1 /* 2 * Copyright 2017 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "si_shader_internal.h" 25 #include "si_pipe.h" 26 #include "sid.h" 27 #include "gallivm/lp_bld_arit.h" 28 #include "gallivm/lp_bld_gather.h" 29 #include "gallivm/lp_bld_intr.h" 30 #include "tgsi/tgsi_build.h" 31 #include "tgsi/tgsi_parse.h" 32 #include "tgsi/tgsi_util.h" 33 34 static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, 35 struct lp_build_tgsi_context *bld_base, 36 struct lp_build_emit_data *emit_data); 37 38 static const struct lp_build_tgsi_action tex_action; 39 40 /** 41 * Given a v8i32 resource descriptor for a buffer, extract the size of the 42 * buffer in number of elements and return it as an i32. 43 */ 44 static LLVMValueRef get_buffer_size( 45 struct lp_build_tgsi_context *bld_base, 46 LLVMValueRef descriptor) 47 { 48 struct si_shader_context *ctx = si_shader_context(bld_base); 49 LLVMBuilderRef builder = ctx->ac.builder; 50 LLVMValueRef size = 51 LLVMBuildExtractElement(builder, descriptor, 52 LLVMConstInt(ctx->i32, 2, 0), ""); 53 54 if (ctx->screen->info.chip_class == VI) { 55 /* On VI, the descriptor contains the size in bytes, 56 * but TXQ must return the size in elements. 57 * The stride is always non-zero for resources using TXQ. 58 */ 59 LLVMValueRef stride = 60 LLVMBuildExtractElement(builder, descriptor, 61 ctx->i32_1, ""); 62 stride = LLVMBuildLShr(builder, stride, 63 LLVMConstInt(ctx->i32, 16, 0), ""); 64 stride = LLVMBuildAnd(builder, stride, 65 LLVMConstInt(ctx->i32, 0x3FFF, 0), ""); 66 67 size = LLVMBuildUDiv(builder, size, stride, ""); 68 } 69 70 return size; 71 } 72 73 static LLVMValueRef 74 shader_buffer_fetch_rsrc(struct si_shader_context *ctx, 75 const struct tgsi_full_src_register *reg, 76 bool ubo) 77 { 78 LLVMValueRef index; 79 80 if (!reg->Register.Indirect) { 81 index = LLVMConstInt(ctx->i32, reg->Register.Index, false); 82 } else { 83 index = si_get_indirect_index(ctx, ®->Indirect, 84 1, reg->Register.Index); 85 } 86 87 if (ubo) 88 return ctx->abi.load_ubo(&ctx->abi, index); 89 else 90 return ctx->abi.load_ssbo(&ctx->abi, index, false); 91 } 92 93 static bool tgsi_is_array_image(unsigned target) 94 { 95 return target == TGSI_TEXTURE_3D || 96 target == TGSI_TEXTURE_CUBE || 97 target == TGSI_TEXTURE_1D_ARRAY || 98 target == TGSI_TEXTURE_2D_ARRAY || 99 target == TGSI_TEXTURE_CUBE_ARRAY || 100 target == TGSI_TEXTURE_2D_ARRAY_MSAA; 101 } 102 103 /** 104 * Given a 256-bit resource descriptor, force the DCC enable bit to off. 105 * 106 * At least on Tonga, executing image stores on images with DCC enabled and 107 * non-trivial can eventually lead to lockups. This can occur when an 108 * application binds an image as read-only but then uses a shader that writes 109 * to it. The OpenGL spec allows almost arbitrarily bad behavior (including 110 * program termination) in this case, but it doesn't cost much to be a bit 111 * nicer: disabling DCC in the shader still leads to undefined results but 112 * avoids the lockup. 113 */ 114 static LLVMValueRef force_dcc_off(struct si_shader_context *ctx, 115 LLVMValueRef rsrc) 116 { 117 if (ctx->screen->info.chip_class <= CIK) { 118 return rsrc; 119 } else { 120 LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0); 121 LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0); 122 LLVMValueRef tmp; 123 124 tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, ""); 125 tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, ""); 126 return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, ""); 127 } 128 } 129 130 LLVMValueRef si_load_image_desc(struct si_shader_context *ctx, 131 LLVMValueRef list, LLVMValueRef index, 132 enum ac_descriptor_type desc_type, bool dcc_off) 133 { 134 LLVMBuilderRef builder = ctx->ac.builder; 135 LLVMValueRef rsrc; 136 137 if (desc_type == AC_DESC_BUFFER) { 138 index = LLVMBuildMul(builder, index, 139 LLVMConstInt(ctx->i32, 2, 0), ""); 140 index = LLVMBuildAdd(builder, index, 141 ctx->i32_1, ""); 142 list = LLVMBuildPointerCast(builder, list, 143 si_const_array(ctx->v4i32, 0), ""); 144 } else { 145 assert(desc_type == AC_DESC_IMAGE); 146 } 147 148 rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index); 149 if (desc_type == AC_DESC_IMAGE && dcc_off) 150 rsrc = force_dcc_off(ctx, rsrc); 151 return rsrc; 152 } 153 154 /** 155 * Load the resource descriptor for \p image. 156 */ 157 static void 158 image_fetch_rsrc( 159 struct lp_build_tgsi_context *bld_base, 160 const struct tgsi_full_src_register *image, 161 bool is_store, unsigned target, 162 LLVMValueRef *rsrc) 163 { 164 struct si_shader_context *ctx = si_shader_context(bld_base); 165 LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn, 166 ctx->param_samplers_and_images); 167 LLVMValueRef index; 168 bool dcc_off = is_store; 169 170 if (!image->Register.Indirect) { 171 const struct tgsi_shader_info *info = bld_base->info; 172 unsigned images_writemask = info->images_store | 173 info->images_atomic; 174 175 index = LLVMConstInt(ctx->i32, 176 si_get_image_slot(image->Register.Index), 0); 177 178 if (images_writemask & (1 << image->Register.Index)) 179 dcc_off = true; 180 } else { 181 /* From the GL_ARB_shader_image_load_store extension spec: 182 * 183 * If a shader performs an image load, store, or atomic 184 * operation using an image variable declared as an array, 185 * and if the index used to select an individual element is 186 * negative or greater than or equal to the size of the 187 * array, the results of the operation are undefined but may 188 * not lead to termination. 189 */ 190 index = si_get_bounded_indirect_index(ctx, &image->Indirect, 191 image->Register.Index, 192 ctx->num_images); 193 index = LLVMBuildSub(ctx->ac.builder, 194 LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0), 195 index, ""); 196 } 197 198 if (image->Register.File != TGSI_FILE_IMAGE) { 199 /* Bindless descriptors are accessible from a different pair of 200 * user SGPR indices. 201 */ 202 rsrc_ptr = LLVMGetParam(ctx->main_fn, 203 ctx->param_bindless_samplers_and_images); 204 index = lp_build_emit_fetch_src(bld_base, image, 205 TGSI_TYPE_UNSIGNED, 0); 206 207 /* For simplicity, bindless image descriptors use fixed 208 * 16-dword slots for now. 209 */ 210 index = LLVMBuildMul(ctx->ac.builder, index, 211 LLVMConstInt(ctx->i32, 2, 0), ""); 212 } 213 214 *rsrc = si_load_image_desc(ctx, rsrc_ptr, index, 215 target == TGSI_TEXTURE_BUFFER ? AC_DESC_BUFFER : AC_DESC_IMAGE, 216 dcc_off); 217 } 218 219 static LLVMValueRef image_fetch_coords( 220 struct lp_build_tgsi_context *bld_base, 221 const struct tgsi_full_instruction *inst, 222 unsigned src, LLVMValueRef desc) 223 { 224 struct si_shader_context *ctx = si_shader_context(bld_base); 225 LLVMBuilderRef builder = ctx->ac.builder; 226 unsigned target = inst->Memory.Texture; 227 unsigned num_coords = tgsi_util_get_texture_coord_dim(target); 228 LLVMValueRef coords[4]; 229 LLVMValueRef tmp; 230 int chan; 231 232 for (chan = 0; chan < num_coords; ++chan) { 233 tmp = lp_build_emit_fetch(bld_base, inst, src, chan); 234 tmp = ac_to_integer(&ctx->ac, tmp); 235 coords[chan] = tmp; 236 } 237 238 if (ctx->screen->info.chip_class >= GFX9) { 239 /* 1D textures are allocated and used as 2D on GFX9. */ 240 if (target == TGSI_TEXTURE_1D) { 241 coords[1] = ctx->i32_0; 242 num_coords++; 243 } else if (target == TGSI_TEXTURE_1D_ARRAY) { 244 coords[2] = coords[1]; 245 coords[1] = ctx->i32_0; 246 num_coords++; 247 } else if (target == TGSI_TEXTURE_2D) { 248 /* The hw can't bind a slice of a 3D image as a 2D 249 * image, because it ignores BASE_ARRAY if the target 250 * is 3D. The workaround is to read BASE_ARRAY and set 251 * it as the 3rd address operand for all 2D images. 252 */ 253 LLVMValueRef first_layer, const5, mask; 254 255 const5 = LLVMConstInt(ctx->i32, 5, 0); 256 mask = LLVMConstInt(ctx->i32, S_008F24_BASE_ARRAY(~0), 0); 257 first_layer = LLVMBuildExtractElement(builder, desc, const5, ""); 258 first_layer = LLVMBuildAnd(builder, first_layer, mask, ""); 259 260 coords[2] = first_layer; 261 num_coords++; 262 } 263 } 264 265 if (num_coords == 1) 266 return coords[0]; 267 268 if (num_coords == 3) { 269 /* LLVM has difficulties lowering 3-element vectors. */ 270 coords[3] = bld_base->uint_bld.undef; 271 num_coords = 4; 272 } 273 274 return lp_build_gather_values(&ctx->gallivm, coords, num_coords); 275 } 276 277 /** 278 * Append the extra mode bits that are used by image load and store. 279 */ 280 static void image_append_args( 281 struct si_shader_context *ctx, 282 struct lp_build_emit_data * emit_data, 283 unsigned target, 284 bool atomic, 285 bool force_glc) 286 { 287 const struct tgsi_full_instruction *inst = emit_data->inst; 288 LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0); 289 LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0); 290 LLVMValueRef r128 = i1false; 291 LLVMValueRef da = tgsi_is_array_image(target) ? i1true : i1false; 292 LLVMValueRef glc = 293 force_glc || 294 inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ? 295 i1true : i1false; 296 LLVMValueRef slc = i1false; 297 LLVMValueRef lwe = i1false; 298 299 if (atomic || (HAVE_LLVM <= 0x0309)) { 300 emit_data->args[emit_data->arg_count++] = r128; 301 emit_data->args[emit_data->arg_count++] = da; 302 if (!atomic) { 303 emit_data->args[emit_data->arg_count++] = glc; 304 } 305 emit_data->args[emit_data->arg_count++] = slc; 306 return; 307 } 308 309 /* HAVE_LLVM >= 0x0400 */ 310 emit_data->args[emit_data->arg_count++] = glc; 311 emit_data->args[emit_data->arg_count++] = slc; 312 emit_data->args[emit_data->arg_count++] = lwe; 313 emit_data->args[emit_data->arg_count++] = da; 314 } 315 316 /** 317 * Append the resource and indexing arguments for buffer intrinsics. 318 * 319 * \param rsrc the v4i32 buffer resource 320 * \param index index into the buffer (stride-based) 321 * \param offset byte offset into the buffer 322 */ 323 static void buffer_append_args( 324 struct si_shader_context *ctx, 325 struct lp_build_emit_data *emit_data, 326 LLVMValueRef rsrc, 327 LLVMValueRef index, 328 LLVMValueRef offset, 329 bool atomic, 330 bool force_glc) 331 { 332 const struct tgsi_full_instruction *inst = emit_data->inst; 333 LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0); 334 LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0); 335 336 emit_data->args[emit_data->arg_count++] = rsrc; 337 emit_data->args[emit_data->arg_count++] = index; /* vindex */ 338 emit_data->args[emit_data->arg_count++] = offset; /* voffset */ 339 if (!atomic) { 340 emit_data->args[emit_data->arg_count++] = 341 force_glc || 342 inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ? 343 i1true : i1false; /* glc */ 344 } 345 emit_data->args[emit_data->arg_count++] = i1false; /* slc */ 346 } 347 348 static void load_fetch_args( 349 struct lp_build_tgsi_context * bld_base, 350 struct lp_build_emit_data * emit_data) 351 { 352 struct si_shader_context *ctx = si_shader_context(bld_base); 353 const struct tgsi_full_instruction * inst = emit_data->inst; 354 unsigned target = inst->Memory.Texture; 355 LLVMValueRef rsrc; 356 357 emit_data->dst_type = ctx->v4f32; 358 359 if (inst->Src[0].Register.File == TGSI_FILE_BUFFER || 360 inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) { 361 LLVMValueRef offset; 362 LLVMValueRef tmp; 363 364 bool ubo = inst->Src[0].Register.File == TGSI_FILE_CONSTBUF; 365 rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], ubo); 366 367 tmp = lp_build_emit_fetch(bld_base, inst, 1, 0); 368 offset = ac_to_integer(&ctx->ac, tmp); 369 370 buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0, 371 offset, false, false); 372 } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE || 373 tgsi_is_bindless_image_file(inst->Src[0].Register.File)) { 374 LLVMValueRef coords; 375 376 image_fetch_rsrc(bld_base, &inst->Src[0], false, target, &rsrc); 377 coords = image_fetch_coords(bld_base, inst, 1, rsrc); 378 379 if (target == TGSI_TEXTURE_BUFFER) { 380 buffer_append_args(ctx, emit_data, rsrc, coords, 381 ctx->i32_0, false, false); 382 } else { 383 emit_data->args[0] = coords; 384 emit_data->args[1] = rsrc; 385 emit_data->args[2] = LLVMConstInt(ctx->i32, 15, 0); /* dmask */ 386 emit_data->arg_count = 3; 387 388 image_append_args(ctx, emit_data, target, false, false); 389 } 390 } 391 } 392 393 static void load_emit_buffer(struct si_shader_context *ctx, 394 struct lp_build_emit_data *emit_data, 395 bool can_speculate, bool allow_smem) 396 { 397 const struct tgsi_full_instruction *inst = emit_data->inst; 398 uint writemask = inst->Dst[0].Register.WriteMask; 399 uint count = util_last_bit(writemask); 400 LLVMValueRef *args = emit_data->args; 401 402 /* Don't use SMEM for shader buffer loads, because LLVM doesn't 403 * select SMEM for SI.load.const with a non-constant offset, and 404 * constant offsets practically don't exist with shader buffers. 405 * 406 * Also, SI.load.const doesn't use inst_offset when it's lowered 407 * to VMEM, so we just end up with more VALU instructions in the end 408 * and no benefit. 409 * 410 * TODO: Remove this line once LLVM can select SMEM with a non-constant 411 * offset, and can derive inst_offset when VMEM is selected. 412 * After that, si_memory_barrier should invalidate sL1 for shader 413 * buffers. 414 */ 415 416 assert(LLVMConstIntGetZExtValue(args[1]) == 0); /* vindex */ 417 emit_data->output[emit_data->chan] = 418 ac_build_buffer_load(&ctx->ac, args[0], count, NULL, 419 args[2], NULL, 0, 420 LLVMConstIntGetZExtValue(args[3]), 421 LLVMConstIntGetZExtValue(args[4]), 422 can_speculate, allow_smem); 423 } 424 425 static LLVMValueRef get_memory_ptr(struct si_shader_context *ctx, 426 const struct tgsi_full_instruction *inst, 427 LLVMTypeRef type, int arg) 428 { 429 LLVMBuilderRef builder = ctx->ac.builder; 430 LLVMValueRef offset, ptr; 431 int addr_space; 432 433 offset = lp_build_emit_fetch(&ctx->bld_base, inst, arg, 0); 434 offset = ac_to_integer(&ctx->ac, offset); 435 436 ptr = ctx->ac.lds; 437 ptr = LLVMBuildGEP(builder, ptr, &offset, 1, ""); 438 addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); 439 ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, addr_space), ""); 440 441 return ptr; 442 } 443 444 static void load_emit_memory( 445 struct si_shader_context *ctx, 446 struct lp_build_emit_data *emit_data) 447 { 448 const struct tgsi_full_instruction *inst = emit_data->inst; 449 unsigned writemask = inst->Dst[0].Register.WriteMask; 450 LLVMValueRef channels[4], ptr, derived_ptr, index; 451 int chan; 452 453 ptr = get_memory_ptr(ctx, inst, ctx->f32, 1); 454 455 for (chan = 0; chan < 4; ++chan) { 456 if (!(writemask & (1 << chan))) { 457 channels[chan] = LLVMGetUndef(ctx->f32); 458 continue; 459 } 460 461 index = LLVMConstInt(ctx->i32, chan, 0); 462 derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, ""); 463 channels[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, ""); 464 } 465 emit_data->output[emit_data->chan] = lp_build_gather_values(&ctx->gallivm, channels, 4); 466 } 467 468 /** 469 * Return true if the memory accessed by a LOAD or STORE instruction is 470 * read-only or write-only, respectively. 471 * 472 * \param shader_buffers_reverse_access_mask 473 * For LOAD, set this to (store | atomic) slot usage in the shader. 474 * For STORE, set this to (load | atomic) slot usage in the shader. 475 * \param images_reverse_access_mask Same as above, but for images. 476 */ 477 static bool is_oneway_access_only(const struct tgsi_full_instruction *inst, 478 const struct tgsi_shader_info *info, 479 unsigned shader_buffers_reverse_access_mask, 480 unsigned images_reverse_access_mask) 481 { 482 /* RESTRICT means NOALIAS. 483 * If there are no writes, we can assume the accessed memory is read-only. 484 * If there are no reads, we can assume the accessed memory is write-only. 485 */ 486 if (inst->Memory.Qualifier & TGSI_MEMORY_RESTRICT) { 487 unsigned reverse_access_mask; 488 489 if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { 490 reverse_access_mask = shader_buffers_reverse_access_mask; 491 } else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { 492 reverse_access_mask = info->images_buffers & 493 images_reverse_access_mask; 494 } else { 495 reverse_access_mask = ~info->images_buffers & 496 images_reverse_access_mask; 497 } 498 499 if (inst->Src[0].Register.Indirect) { 500 if (!reverse_access_mask) 501 return true; 502 } else { 503 if (!(reverse_access_mask & 504 (1u << inst->Src[0].Register.Index))) 505 return true; 506 } 507 } 508 509 /* If there are no buffer writes (for both shader buffers & image 510 * buffers), it implies that buffer memory is read-only. 511 * If there are no buffer reads (for both shader buffers & image 512 * buffers), it implies that buffer memory is write-only. 513 * 514 * Same for the case when there are no writes/reads for non-buffer 515 * images. 516 */ 517 if (inst->Src[0].Register.File == TGSI_FILE_BUFFER || 518 (inst->Memory.Texture == TGSI_TEXTURE_BUFFER && 519 (inst->Src[0].Register.File == TGSI_FILE_IMAGE || 520 tgsi_is_bindless_image_file(inst->Src[0].Register.File)))) { 521 if (!shader_buffers_reverse_access_mask && 522 !(info->images_buffers & images_reverse_access_mask)) 523 return true; 524 } else { 525 if (!(~info->images_buffers & images_reverse_access_mask)) 526 return true; 527 } 528 return false; 529 } 530 531 static void load_emit( 532 const struct lp_build_tgsi_action *action, 533 struct lp_build_tgsi_context *bld_base, 534 struct lp_build_emit_data *emit_data) 535 { 536 struct si_shader_context *ctx = si_shader_context(bld_base); 537 LLVMBuilderRef builder = ctx->ac.builder; 538 const struct tgsi_full_instruction * inst = emit_data->inst; 539 const struct tgsi_shader_info *info = &ctx->shader->selector->info; 540 char intrinsic_name[64]; 541 bool can_speculate = false; 542 543 if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) { 544 load_emit_memory(ctx, emit_data); 545 return; 546 } 547 548 if (inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) { 549 load_emit_buffer(ctx, emit_data, true, true); 550 return; 551 } 552 553 if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) 554 ac_build_waitcnt(&ctx->ac, VM_CNT); 555 556 can_speculate = !(inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) && 557 is_oneway_access_only(inst, info, 558 info->shader_buffers_store | 559 info->shader_buffers_atomic, 560 info->images_store | 561 info->images_atomic); 562 563 if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { 564 load_emit_buffer(ctx, emit_data, can_speculate, false); 565 return; 566 } 567 568 if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { 569 emit_data->output[emit_data->chan] = 570 lp_build_intrinsic( 571 builder, "llvm.amdgcn.buffer.load.format.v4f32", emit_data->dst_type, 572 emit_data->args, emit_data->arg_count, 573 ac_get_load_intr_attribs(can_speculate)); 574 } else { 575 ac_get_image_intr_name("llvm.amdgcn.image.load", 576 emit_data->dst_type, /* vdata */ 577 LLVMTypeOf(emit_data->args[0]), /* coords */ 578 LLVMTypeOf(emit_data->args[1]), /* rsrc */ 579 intrinsic_name, sizeof(intrinsic_name)); 580 581 emit_data->output[emit_data->chan] = 582 lp_build_intrinsic( 583 builder, intrinsic_name, emit_data->dst_type, 584 emit_data->args, emit_data->arg_count, 585 ac_get_load_intr_attribs(can_speculate)); 586 } 587 } 588 589 static void store_fetch_args( 590 struct lp_build_tgsi_context * bld_base, 591 struct lp_build_emit_data * emit_data) 592 { 593 struct si_shader_context *ctx = si_shader_context(bld_base); 594 const struct tgsi_full_instruction * inst = emit_data->inst; 595 struct tgsi_full_src_register memory; 596 LLVMValueRef chans[4]; 597 LLVMValueRef data; 598 LLVMValueRef rsrc; 599 unsigned chan; 600 601 emit_data->dst_type = ctx->voidt; 602 603 for (chan = 0; chan < 4; ++chan) { 604 chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan); 605 } 606 data = lp_build_gather_values(&ctx->gallivm, chans, 4); 607 608 emit_data->args[emit_data->arg_count++] = data; 609 610 memory = tgsi_full_src_register_from_dst(&inst->Dst[0]); 611 612 if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) { 613 LLVMValueRef offset; 614 LLVMValueRef tmp; 615 616 rsrc = shader_buffer_fetch_rsrc(ctx, &memory, false); 617 618 tmp = lp_build_emit_fetch(bld_base, inst, 0, 0); 619 offset = ac_to_integer(&ctx->ac, tmp); 620 621 buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0, 622 offset, false, false); 623 } else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE || 624 tgsi_is_bindless_image_file(inst->Dst[0].Register.File)) { 625 unsigned target = inst->Memory.Texture; 626 LLVMValueRef coords; 627 628 /* 8bit/16bit TC L1 write corruption bug on SI. 629 * All store opcodes not aligned to a dword are affected. 630 * 631 * The only way to get unaligned stores in radeonsi is through 632 * shader images. 633 */ 634 bool force_glc = ctx->screen->info.chip_class == SI; 635 636 image_fetch_rsrc(bld_base, &memory, true, target, &rsrc); 637 coords = image_fetch_coords(bld_base, inst, 0, rsrc); 638 639 if (target == TGSI_TEXTURE_BUFFER) { 640 buffer_append_args(ctx, emit_data, rsrc, coords, 641 ctx->i32_0, false, force_glc); 642 } else { 643 emit_data->args[1] = coords; 644 emit_data->args[2] = rsrc; 645 emit_data->args[3] = LLVMConstInt(ctx->i32, 15, 0); /* dmask */ 646 emit_data->arg_count = 4; 647 648 image_append_args(ctx, emit_data, target, false, force_glc); 649 } 650 } 651 } 652 653 static void store_emit_buffer( 654 struct si_shader_context *ctx, 655 struct lp_build_emit_data *emit_data, 656 bool writeonly_memory) 657 { 658 const struct tgsi_full_instruction *inst = emit_data->inst; 659 LLVMBuilderRef builder = ctx->ac.builder; 660 LLVMValueRef base_data = emit_data->args[0]; 661 LLVMValueRef base_offset = emit_data->args[3]; 662 unsigned writemask = inst->Dst[0].Register.WriteMask; 663 664 while (writemask) { 665 int start, count; 666 const char *intrinsic_name; 667 LLVMValueRef data; 668 LLVMValueRef offset; 669 LLVMValueRef tmp; 670 671 u_bit_scan_consecutive_range(&writemask, &start, &count); 672 673 /* Due to an LLVM limitation, split 3-element writes 674 * into a 2-element and a 1-element write. */ 675 if (count == 3) { 676 writemask |= 1 << (start + 2); 677 count = 2; 678 } 679 680 if (count == 4) { 681 data = base_data; 682 intrinsic_name = "llvm.amdgcn.buffer.store.v4f32"; 683 } else if (count == 2) { 684 LLVMTypeRef v2f32 = LLVMVectorType(ctx->f32, 2); 685 686 tmp = LLVMBuildExtractElement( 687 builder, base_data, 688 LLVMConstInt(ctx->i32, start, 0), ""); 689 data = LLVMBuildInsertElement( 690 builder, LLVMGetUndef(v2f32), tmp, 691 ctx->i32_0, ""); 692 693 tmp = LLVMBuildExtractElement( 694 builder, base_data, 695 LLVMConstInt(ctx->i32, start + 1, 0), ""); 696 data = LLVMBuildInsertElement( 697 builder, data, tmp, ctx->i32_1, ""); 698 699 intrinsic_name = "llvm.amdgcn.buffer.store.v2f32"; 700 } else { 701 assert(count == 1); 702 data = LLVMBuildExtractElement( 703 builder, base_data, 704 LLVMConstInt(ctx->i32, start, 0), ""); 705 intrinsic_name = "llvm.amdgcn.buffer.store.f32"; 706 } 707 708 offset = base_offset; 709 if (start != 0) { 710 offset = LLVMBuildAdd( 711 builder, offset, 712 LLVMConstInt(ctx->i32, start * 4, 0), ""); 713 } 714 715 emit_data->args[0] = data; 716 emit_data->args[3] = offset; 717 718 lp_build_intrinsic( 719 builder, intrinsic_name, emit_data->dst_type, 720 emit_data->args, emit_data->arg_count, 721 ac_get_store_intr_attribs(writeonly_memory)); 722 } 723 } 724 725 static void store_emit_memory( 726 struct si_shader_context *ctx, 727 struct lp_build_emit_data *emit_data) 728 { 729 const struct tgsi_full_instruction *inst = emit_data->inst; 730 LLVMBuilderRef builder = ctx->ac.builder; 731 unsigned writemask = inst->Dst[0].Register.WriteMask; 732 LLVMValueRef ptr, derived_ptr, data, index; 733 int chan; 734 735 ptr = get_memory_ptr(ctx, inst, ctx->f32, 0); 736 737 for (chan = 0; chan < 4; ++chan) { 738 if (!(writemask & (1 << chan))) { 739 continue; 740 } 741 data = lp_build_emit_fetch(&ctx->bld_base, inst, 1, chan); 742 index = LLVMConstInt(ctx->i32, chan, 0); 743 derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, ""); 744 LLVMBuildStore(builder, data, derived_ptr); 745 } 746 } 747 748 static void store_emit( 749 const struct lp_build_tgsi_action *action, 750 struct lp_build_tgsi_context *bld_base, 751 struct lp_build_emit_data *emit_data) 752 { 753 struct si_shader_context *ctx = si_shader_context(bld_base); 754 LLVMBuilderRef builder = ctx->ac.builder; 755 const struct tgsi_full_instruction * inst = emit_data->inst; 756 const struct tgsi_shader_info *info = &ctx->shader->selector->info; 757 unsigned target = inst->Memory.Texture; 758 char intrinsic_name[64]; 759 bool writeonly_memory = false; 760 761 if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) { 762 store_emit_memory(ctx, emit_data); 763 return; 764 } 765 766 if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) 767 ac_build_waitcnt(&ctx->ac, VM_CNT); 768 769 writeonly_memory = is_oneway_access_only(inst, info, 770 info->shader_buffers_load | 771 info->shader_buffers_atomic, 772 info->images_load | 773 info->images_atomic); 774 775 if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) { 776 store_emit_buffer(ctx, emit_data, writeonly_memory); 777 return; 778 } 779 780 if (target == TGSI_TEXTURE_BUFFER) { 781 emit_data->output[emit_data->chan] = lp_build_intrinsic( 782 builder, "llvm.amdgcn.buffer.store.format.v4f32", 783 emit_data->dst_type, emit_data->args, 784 emit_data->arg_count, 785 ac_get_store_intr_attribs(writeonly_memory)); 786 } else { 787 ac_get_image_intr_name("llvm.amdgcn.image.store", 788 LLVMTypeOf(emit_data->args[0]), /* vdata */ 789 LLVMTypeOf(emit_data->args[1]), /* coords */ 790 LLVMTypeOf(emit_data->args[2]), /* rsrc */ 791 intrinsic_name, sizeof(intrinsic_name)); 792 793 emit_data->output[emit_data->chan] = 794 lp_build_intrinsic( 795 builder, intrinsic_name, emit_data->dst_type, 796 emit_data->args, emit_data->arg_count, 797 ac_get_store_intr_attribs(writeonly_memory)); 798 } 799 } 800 801 static void atomic_fetch_args( 802 struct lp_build_tgsi_context * bld_base, 803 struct lp_build_emit_data * emit_data) 804 { 805 struct si_shader_context *ctx = si_shader_context(bld_base); 806 const struct tgsi_full_instruction * inst = emit_data->inst; 807 LLVMValueRef data1, data2; 808 LLVMValueRef rsrc; 809 LLVMValueRef tmp; 810 811 emit_data->dst_type = ctx->f32; 812 813 tmp = lp_build_emit_fetch(bld_base, inst, 2, 0); 814 data1 = ac_to_integer(&ctx->ac, tmp); 815 816 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { 817 tmp = lp_build_emit_fetch(bld_base, inst, 3, 0); 818 data2 = ac_to_integer(&ctx->ac, tmp); 819 } 820 821 /* llvm.amdgcn.image/buffer.atomic.cmpswap reflect the hardware order 822 * of arguments, which is reversed relative to TGSI (and GLSL) 823 */ 824 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) 825 emit_data->args[emit_data->arg_count++] = data2; 826 emit_data->args[emit_data->arg_count++] = data1; 827 828 if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { 829 LLVMValueRef offset; 830 831 rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], false); 832 833 tmp = lp_build_emit_fetch(bld_base, inst, 1, 0); 834 offset = ac_to_integer(&ctx->ac, tmp); 835 836 buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0, 837 offset, true, false); 838 } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE || 839 tgsi_is_bindless_image_file(inst->Src[0].Register.File)) { 840 unsigned target = inst->Memory.Texture; 841 LLVMValueRef coords; 842 843 image_fetch_rsrc(bld_base, &inst->Src[0], true, target, &rsrc); 844 coords = image_fetch_coords(bld_base, inst, 1, rsrc); 845 846 if (target == TGSI_TEXTURE_BUFFER) { 847 buffer_append_args(ctx, emit_data, rsrc, coords, 848 ctx->i32_0, true, false); 849 } else { 850 emit_data->args[emit_data->arg_count++] = coords; 851 emit_data->args[emit_data->arg_count++] = rsrc; 852 853 image_append_args(ctx, emit_data, target, true, false); 854 } 855 } 856 } 857 858 static void atomic_emit_memory(struct si_shader_context *ctx, 859 struct lp_build_emit_data *emit_data) { 860 LLVMBuilderRef builder = ctx->ac.builder; 861 const struct tgsi_full_instruction * inst = emit_data->inst; 862 LLVMValueRef ptr, result, arg; 863 864 ptr = get_memory_ptr(ctx, inst, ctx->i32, 1); 865 866 arg = lp_build_emit_fetch(&ctx->bld_base, inst, 2, 0); 867 arg = ac_to_integer(&ctx->ac, arg); 868 869 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { 870 LLVMValueRef new_data; 871 new_data = lp_build_emit_fetch(&ctx->bld_base, 872 inst, 3, 0); 873 874 new_data = ac_to_integer(&ctx->ac, new_data); 875 876 result = LLVMBuildAtomicCmpXchg(builder, ptr, arg, new_data, 877 LLVMAtomicOrderingSequentiallyConsistent, 878 LLVMAtomicOrderingSequentiallyConsistent, 879 false); 880 881 result = LLVMBuildExtractValue(builder, result, 0, ""); 882 } else { 883 LLVMAtomicRMWBinOp op; 884 885 switch(inst->Instruction.Opcode) { 886 case TGSI_OPCODE_ATOMUADD: 887 op = LLVMAtomicRMWBinOpAdd; 888 break; 889 case TGSI_OPCODE_ATOMXCHG: 890 op = LLVMAtomicRMWBinOpXchg; 891 break; 892 case TGSI_OPCODE_ATOMAND: 893 op = LLVMAtomicRMWBinOpAnd; 894 break; 895 case TGSI_OPCODE_ATOMOR: 896 op = LLVMAtomicRMWBinOpOr; 897 break; 898 case TGSI_OPCODE_ATOMXOR: 899 op = LLVMAtomicRMWBinOpXor; 900 break; 901 case TGSI_OPCODE_ATOMUMIN: 902 op = LLVMAtomicRMWBinOpUMin; 903 break; 904 case TGSI_OPCODE_ATOMUMAX: 905 op = LLVMAtomicRMWBinOpUMax; 906 break; 907 case TGSI_OPCODE_ATOMIMIN: 908 op = LLVMAtomicRMWBinOpMin; 909 break; 910 case TGSI_OPCODE_ATOMIMAX: 911 op = LLVMAtomicRMWBinOpMax; 912 break; 913 default: 914 unreachable("unknown atomic opcode"); 915 } 916 917 result = LLVMBuildAtomicRMW(builder, op, ptr, arg, 918 LLVMAtomicOrderingSequentiallyConsistent, 919 false); 920 } 921 emit_data->output[emit_data->chan] = LLVMBuildBitCast(builder, result, emit_data->dst_type, ""); 922 } 923 924 static void atomic_emit( 925 const struct lp_build_tgsi_action *action, 926 struct lp_build_tgsi_context *bld_base, 927 struct lp_build_emit_data *emit_data) 928 { 929 struct si_shader_context *ctx = si_shader_context(bld_base); 930 LLVMBuilderRef builder = ctx->ac.builder; 931 const struct tgsi_full_instruction * inst = emit_data->inst; 932 char intrinsic_name[40]; 933 LLVMValueRef tmp; 934 935 if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) { 936 atomic_emit_memory(ctx, emit_data); 937 return; 938 } 939 940 if (inst->Src[0].Register.File == TGSI_FILE_BUFFER || 941 inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { 942 snprintf(intrinsic_name, sizeof(intrinsic_name), 943 "llvm.amdgcn.buffer.atomic.%s", action->intr_name); 944 } else { 945 LLVMValueRef coords; 946 char coords_type[8]; 947 948 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) 949 coords = emit_data->args[2]; 950 else 951 coords = emit_data->args[1]; 952 953 ac_build_type_name_for_intr(LLVMTypeOf(coords), coords_type, sizeof(coords_type)); 954 snprintf(intrinsic_name, sizeof(intrinsic_name), 955 "llvm.amdgcn.image.atomic.%s.%s", 956 action->intr_name, coords_type); 957 } 958 959 tmp = lp_build_intrinsic( 960 builder, intrinsic_name, ctx->i32, 961 emit_data->args, emit_data->arg_count, 0); 962 emit_data->output[emit_data->chan] = ac_to_float(&ctx->ac, tmp); 963 } 964 965 static void set_tex_fetch_args(struct si_shader_context *ctx, 966 struct lp_build_emit_data *emit_data, 967 unsigned target, 968 LLVMValueRef res_ptr, LLVMValueRef samp_ptr, 969 LLVMValueRef *param, unsigned count, 970 unsigned dmask) 971 { 972 struct ac_image_args args = {}; 973 974 /* Pad to power of two vector */ 975 while (count < util_next_power_of_two(count)) 976 param[count++] = LLVMGetUndef(ctx->i32); 977 978 if (count > 1) 979 args.addr = lp_build_gather_values(&ctx->gallivm, param, count); 980 else 981 args.addr = param[0]; 982 983 args.resource = res_ptr; 984 args.sampler = samp_ptr; 985 args.dmask = dmask; 986 args.unorm = target == TGSI_TEXTURE_RECT || 987 target == TGSI_TEXTURE_SHADOWRECT; 988 args.da = tgsi_is_array_sampler(target); 989 990 /* Ugly, but we seem to have no other choice right now. */ 991 STATIC_ASSERT(sizeof(args) <= sizeof(emit_data->args)); 992 memcpy(emit_data->args, &args, sizeof(args)); 993 } 994 995 static LLVMValueRef fix_resinfo(struct si_shader_context *ctx, 996 unsigned target, LLVMValueRef out) 997 { 998 LLVMBuilderRef builder = ctx->ac.builder; 999 1000 /* 1D textures are allocated and used as 2D on GFX9. */ 1001 if (ctx->screen->info.chip_class >= GFX9 && 1002 (target == TGSI_TEXTURE_1D_ARRAY || 1003 target == TGSI_TEXTURE_SHADOW1D_ARRAY)) { 1004 LLVMValueRef layers = 1005 LLVMBuildExtractElement(builder, out, 1006 LLVMConstInt(ctx->i32, 2, 0), ""); 1007 out = LLVMBuildInsertElement(builder, out, layers, 1008 ctx->i32_1, ""); 1009 } 1010 1011 /* Divide the number of layers by 6 to get the number of cubes. */ 1012 if (target == TGSI_TEXTURE_CUBE_ARRAY || 1013 target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 1014 LLVMValueRef imm2 = LLVMConstInt(ctx->i32, 2, 0); 1015 1016 LLVMValueRef z = LLVMBuildExtractElement(builder, out, imm2, ""); 1017 z = LLVMBuildSDiv(builder, z, LLVMConstInt(ctx->i32, 6, 0), ""); 1018 1019 out = LLVMBuildInsertElement(builder, out, z, imm2, ""); 1020 } 1021 return out; 1022 } 1023 1024 static void resq_fetch_args( 1025 struct lp_build_tgsi_context * bld_base, 1026 struct lp_build_emit_data * emit_data) 1027 { 1028 struct si_shader_context *ctx = si_shader_context(bld_base); 1029 const struct tgsi_full_instruction *inst = emit_data->inst; 1030 const struct tgsi_full_src_register *reg = &inst->Src[0]; 1031 1032 emit_data->dst_type = ctx->v4i32; 1033 1034 if (reg->Register.File == TGSI_FILE_BUFFER) { 1035 emit_data->args[0] = shader_buffer_fetch_rsrc(ctx, reg, false); 1036 emit_data->arg_count = 1; 1037 } else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { 1038 image_fetch_rsrc(bld_base, reg, false, inst->Memory.Texture, 1039 &emit_data->args[0]); 1040 emit_data->arg_count = 1; 1041 } else { 1042 LLVMValueRef res_ptr; 1043 unsigned image_target; 1044 1045 if (inst->Memory.Texture == TGSI_TEXTURE_3D) 1046 image_target = TGSI_TEXTURE_2D_ARRAY; 1047 else 1048 image_target = inst->Memory.Texture; 1049 1050 image_fetch_rsrc(bld_base, reg, false, inst->Memory.Texture, 1051 &res_ptr); 1052 set_tex_fetch_args(ctx, emit_data, image_target, 1053 res_ptr, NULL, &ctx->i32_0, 1, 1054 0xf); 1055 } 1056 } 1057 1058 static void resq_emit( 1059 const struct lp_build_tgsi_action *action, 1060 struct lp_build_tgsi_context *bld_base, 1061 struct lp_build_emit_data *emit_data) 1062 { 1063 struct si_shader_context *ctx = si_shader_context(bld_base); 1064 LLVMBuilderRef builder = ctx->ac.builder; 1065 const struct tgsi_full_instruction *inst = emit_data->inst; 1066 LLVMValueRef out; 1067 1068 if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { 1069 out = LLVMBuildExtractElement(builder, emit_data->args[0], 1070 LLVMConstInt(ctx->i32, 2, 0), ""); 1071 } else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { 1072 out = get_buffer_size(bld_base, emit_data->args[0]); 1073 } else { 1074 struct ac_image_args args; 1075 1076 memcpy(&args, emit_data->args, sizeof(args)); /* ugly */ 1077 args.opcode = ac_image_get_resinfo; 1078 out = ac_build_image_opcode(&ctx->ac, &args); 1079 1080 out = fix_resinfo(ctx, inst->Memory.Texture, out); 1081 } 1082 1083 emit_data->output[emit_data->chan] = out; 1084 } 1085 1086 /** 1087 * Load an image view, fmask view. or sampler state descriptor. 1088 */ 1089 LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, 1090 LLVMValueRef list, LLVMValueRef index, 1091 enum ac_descriptor_type type) 1092 { 1093 LLVMBuilderRef builder = ctx->ac.builder; 1094 1095 switch (type) { 1096 case AC_DESC_IMAGE: 1097 /* The image is at [0:7]. */ 1098 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), ""); 1099 break; 1100 case AC_DESC_BUFFER: 1101 /* The buffer is in [4:7]. */ 1102 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), ""); 1103 index = LLVMBuildAdd(builder, index, ctx->i32_1, ""); 1104 list = LLVMBuildPointerCast(builder, list, 1105 si_const_array(ctx->v4i32, 0), ""); 1106 break; 1107 case AC_DESC_FMASK: 1108 /* The FMASK is at [8:15]. */ 1109 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), ""); 1110 index = LLVMBuildAdd(builder, index, ctx->i32_1, ""); 1111 break; 1112 case AC_DESC_SAMPLER: 1113 /* The sampler state is at [12:15]. */ 1114 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), ""); 1115 index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 3, 0), ""); 1116 list = LLVMBuildPointerCast(builder, list, 1117 si_const_array(ctx->v4i32, 0), ""); 1118 break; 1119 } 1120 1121 return ac_build_load_to_sgpr(&ctx->ac, list, index); 1122 } 1123 1124 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. 1125 * 1126 * SI-CI: 1127 * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic 1128 * filtering manually. The driver sets img7 to a mask clearing 1129 * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do: 1130 * s_and_b32 samp0, samp0, img7 1131 * 1132 * VI: 1133 * The ANISO_OVERRIDE sampler field enables this fix in TA. 1134 */ 1135 static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx, 1136 LLVMValueRef res, LLVMValueRef samp) 1137 { 1138 LLVMValueRef img7, samp0; 1139 1140 if (ctx->screen->info.chip_class >= VI) 1141 return samp; 1142 1143 img7 = LLVMBuildExtractElement(ctx->ac.builder, res, 1144 LLVMConstInt(ctx->i32, 7, 0), ""); 1145 samp0 = LLVMBuildExtractElement(ctx->ac.builder, samp, 1146 ctx->i32_0, ""); 1147 samp0 = LLVMBuildAnd(ctx->ac.builder, samp0, img7, ""); 1148 return LLVMBuildInsertElement(ctx->ac.builder, samp, samp0, 1149 ctx->i32_0, ""); 1150 } 1151 1152 static void tex_fetch_ptrs( 1153 struct lp_build_tgsi_context *bld_base, 1154 struct lp_build_emit_data *emit_data, 1155 LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr) 1156 { 1157 struct si_shader_context *ctx = si_shader_context(bld_base); 1158 LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers_and_images); 1159 const struct tgsi_full_instruction *inst = emit_data->inst; 1160 const struct tgsi_full_src_register *reg; 1161 unsigned target = inst->Texture.Texture; 1162 unsigned sampler_src; 1163 LLVMValueRef index; 1164 1165 sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1; 1166 reg = &emit_data->inst->Src[sampler_src]; 1167 1168 if (reg->Register.Indirect) { 1169 index = si_get_bounded_indirect_index(ctx, 1170 ®->Indirect, 1171 reg->Register.Index, 1172 ctx->num_samplers); 1173 index = LLVMBuildAdd(ctx->ac.builder, index, 1174 LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), ""); 1175 } else { 1176 index = LLVMConstInt(ctx->i32, 1177 si_get_sampler_slot(reg->Register.Index), 0); 1178 } 1179 1180 if (reg->Register.File != TGSI_FILE_SAMPLER) { 1181 /* Bindless descriptors are accessible from a different pair of 1182 * user SGPR indices. 1183 */ 1184 list = LLVMGetParam(ctx->main_fn, 1185 ctx->param_bindless_samplers_and_images); 1186 index = lp_build_emit_fetch_src(bld_base, reg, 1187 TGSI_TYPE_UNSIGNED, 0); 1188 } 1189 1190 if (target == TGSI_TEXTURE_BUFFER) 1191 *res_ptr = si_load_sampler_desc(ctx, list, index, AC_DESC_BUFFER); 1192 else 1193 *res_ptr = si_load_sampler_desc(ctx, list, index, AC_DESC_IMAGE); 1194 1195 if (samp_ptr) 1196 *samp_ptr = NULL; 1197 if (fmask_ptr) 1198 *fmask_ptr = NULL; 1199 1200 if (target == TGSI_TEXTURE_2D_MSAA || 1201 target == TGSI_TEXTURE_2D_ARRAY_MSAA) { 1202 if (fmask_ptr) 1203 *fmask_ptr = si_load_sampler_desc(ctx, list, index, 1204 AC_DESC_FMASK); 1205 } else if (target != TGSI_TEXTURE_BUFFER) { 1206 if (samp_ptr) { 1207 *samp_ptr = si_load_sampler_desc(ctx, list, index, 1208 AC_DESC_SAMPLER); 1209 *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr); 1210 } 1211 } 1212 } 1213 1214 static void txq_fetch_args( 1215 struct lp_build_tgsi_context *bld_base, 1216 struct lp_build_emit_data *emit_data) 1217 { 1218 struct si_shader_context *ctx = si_shader_context(bld_base); 1219 const struct tgsi_full_instruction *inst = emit_data->inst; 1220 unsigned target = inst->Texture.Texture; 1221 LLVMValueRef res_ptr; 1222 LLVMValueRef address; 1223 1224 tex_fetch_ptrs(bld_base, emit_data, &res_ptr, NULL, NULL); 1225 1226 if (target == TGSI_TEXTURE_BUFFER) { 1227 /* Read the size from the buffer descriptor directly. */ 1228 emit_data->args[0] = get_buffer_size(bld_base, res_ptr); 1229 return; 1230 } 1231 1232 /* Textures - set the mip level. */ 1233 address = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X); 1234 1235 set_tex_fetch_args(ctx, emit_data, target, res_ptr, 1236 NULL, &address, 1, 0xf); 1237 } 1238 1239 static void txq_emit(const struct lp_build_tgsi_action *action, 1240 struct lp_build_tgsi_context *bld_base, 1241 struct lp_build_emit_data *emit_data) 1242 { 1243 struct si_shader_context *ctx = si_shader_context(bld_base); 1244 struct ac_image_args args; 1245 unsigned target = emit_data->inst->Texture.Texture; 1246 1247 if (target == TGSI_TEXTURE_BUFFER) { 1248 /* Just return the buffer size. */ 1249 emit_data->output[emit_data->chan] = emit_data->args[0]; 1250 return; 1251 } 1252 1253 memcpy(&args, emit_data->args, sizeof(args)); /* ugly */ 1254 1255 args.opcode = ac_image_get_resinfo; 1256 LLVMValueRef result = ac_build_image_opcode(&ctx->ac, &args); 1257 1258 emit_data->output[emit_data->chan] = fix_resinfo(ctx, target, result); 1259 } 1260 1261 static void tex_fetch_args( 1262 struct lp_build_tgsi_context *bld_base, 1263 struct lp_build_emit_data *emit_data) 1264 { 1265 struct si_shader_context *ctx = si_shader_context(bld_base); 1266 const struct tgsi_full_instruction *inst = emit_data->inst; 1267 unsigned opcode = inst->Instruction.Opcode; 1268 unsigned target = inst->Texture.Texture; 1269 LLVMValueRef coords[5], derivs[6]; 1270 LLVMValueRef address[16]; 1271 unsigned num_coords = tgsi_util_get_texture_coord_dim(target); 1272 int ref_pos = tgsi_util_get_shadow_ref_src_index(target); 1273 unsigned count = 0; 1274 unsigned chan; 1275 unsigned num_deriv_channels = 0; 1276 bool has_offset = inst->Texture.NumOffsets > 0; 1277 LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL; 1278 unsigned dmask = 0xf; 1279 1280 tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr); 1281 1282 if (target == TGSI_TEXTURE_BUFFER) { 1283 emit_data->dst_type = ctx->v4f32; 1284 emit_data->args[0] = res_ptr; 1285 emit_data->args[1] = ctx->i32_0; 1286 emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X); 1287 emit_data->arg_count = 3; 1288 return; 1289 } 1290 1291 /* Fetch and project texture coordinates */ 1292 coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); 1293 for (chan = 0; chan < 3; chan++) { 1294 coords[chan] = lp_build_emit_fetch(bld_base, 1295 emit_data->inst, 0, 1296 chan); 1297 if (opcode == TGSI_OPCODE_TXP) 1298 coords[chan] = lp_build_emit_llvm_binary(bld_base, 1299 TGSI_OPCODE_DIV, 1300 coords[chan], 1301 coords[3]); 1302 } 1303 1304 if (opcode == TGSI_OPCODE_TXP) 1305 coords[3] = ctx->ac.f32_1; 1306 1307 /* Pack offsets. */ 1308 if (has_offset && 1309 opcode != TGSI_OPCODE_TXF && 1310 opcode != TGSI_OPCODE_TXF_LZ) { 1311 /* The offsets are six-bit signed integers packed like this: 1312 * X=[5:0], Y=[13:8], and Z=[21:16]. 1313 */ 1314 LLVMValueRef offset[3], pack; 1315 1316 assert(inst->Texture.NumOffsets == 1); 1317 1318 for (chan = 0; chan < 3; chan++) { 1319 offset[chan] = lp_build_emit_fetch_texoffset(bld_base, 1320 emit_data->inst, 0, chan); 1321 offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan], 1322 LLVMConstInt(ctx->i32, 0x3f, 0), ""); 1323 if (chan) 1324 offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan], 1325 LLVMConstInt(ctx->i32, chan*8, 0), ""); 1326 } 1327 1328 pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], ""); 1329 pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], ""); 1330 address[count++] = pack; 1331 } 1332 1333 /* Pack LOD bias value */ 1334 if (opcode == TGSI_OPCODE_TXB) 1335 address[count++] = coords[3]; 1336 if (opcode == TGSI_OPCODE_TXB2) 1337 address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); 1338 1339 /* Pack depth comparison value */ 1340 if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) { 1341 LLVMValueRef z; 1342 1343 if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 1344 z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); 1345 } else { 1346 assert(ref_pos >= 0); 1347 z = coords[ref_pos]; 1348 } 1349 1350 /* Section 8.23.1 (Depth Texture Comparison Mode) of the 1351 * OpenGL 4.5 spec says: 1352 * 1353 * "If the textures internal format indicates a fixed-point 1354 * depth texture, then D_t and D_ref are clamped to the 1355 * range [0, 1]; otherwise no clamping is performed." 1356 * 1357 * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT, 1358 * so the depth comparison value isn't clamped for Z16 and 1359 * Z24 anymore. Do it manually here. 1360 */ 1361 if (ctx->screen->info.chip_class >= VI) { 1362 LLVMValueRef upgraded; 1363 LLVMValueRef clamped; 1364 upgraded = LLVMBuildExtractElement(ctx->ac.builder, samp_ptr, 1365 LLVMConstInt(ctx->i32, 3, false), ""); 1366 upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded, 1367 LLVMConstInt(ctx->i32, 29, false), ""); 1368 upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->i1, ""); 1369 clamped = ac_build_clamp(&ctx->ac, z); 1370 z = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped, z, ""); 1371 } 1372 1373 address[count++] = z; 1374 } 1375 1376 /* Pack user derivatives */ 1377 if (opcode == TGSI_OPCODE_TXD) { 1378 int param, num_src_deriv_channels, num_dst_deriv_channels; 1379 1380 switch (target) { 1381 case TGSI_TEXTURE_3D: 1382 num_src_deriv_channels = 3; 1383 num_dst_deriv_channels = 3; 1384 num_deriv_channels = 3; 1385 break; 1386 case TGSI_TEXTURE_2D: 1387 case TGSI_TEXTURE_SHADOW2D: 1388 case TGSI_TEXTURE_RECT: 1389 case TGSI_TEXTURE_SHADOWRECT: 1390 case TGSI_TEXTURE_2D_ARRAY: 1391 case TGSI_TEXTURE_SHADOW2D_ARRAY: 1392 num_src_deriv_channels = 2; 1393 num_dst_deriv_channels = 2; 1394 num_deriv_channels = 2; 1395 break; 1396 case TGSI_TEXTURE_CUBE: 1397 case TGSI_TEXTURE_SHADOWCUBE: 1398 case TGSI_TEXTURE_CUBE_ARRAY: 1399 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 1400 /* Cube derivatives will be converted to 2D. */ 1401 num_src_deriv_channels = 3; 1402 num_dst_deriv_channels = 3; 1403 num_deriv_channels = 2; 1404 break; 1405 case TGSI_TEXTURE_1D: 1406 case TGSI_TEXTURE_SHADOW1D: 1407 case TGSI_TEXTURE_1D_ARRAY: 1408 case TGSI_TEXTURE_SHADOW1D_ARRAY: 1409 num_src_deriv_channels = 1; 1410 1411 /* 1D textures are allocated and used as 2D on GFX9. */ 1412 if (ctx->screen->info.chip_class >= GFX9) { 1413 num_dst_deriv_channels = 2; 1414 num_deriv_channels = 2; 1415 } else { 1416 num_dst_deriv_channels = 1; 1417 num_deriv_channels = 1; 1418 } 1419 break; 1420 default: 1421 unreachable("invalid target"); 1422 } 1423 1424 for (param = 0; param < 2; param++) { 1425 for (chan = 0; chan < num_src_deriv_channels; chan++) 1426 derivs[param * num_dst_deriv_channels + chan] = 1427 lp_build_emit_fetch(bld_base, inst, param+1, chan); 1428 1429 /* Fill in the rest with zeros. */ 1430 for (chan = num_src_deriv_channels; 1431 chan < num_dst_deriv_channels; chan++) 1432 derivs[param * num_dst_deriv_channels + chan] = 1433 ctx->ac.f32_0; 1434 } 1435 } 1436 1437 if (target == TGSI_TEXTURE_CUBE || 1438 target == TGSI_TEXTURE_CUBE_ARRAY || 1439 target == TGSI_TEXTURE_SHADOWCUBE || 1440 target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 1441 ac_prepare_cube_coords(&ctx->ac, 1442 opcode == TGSI_OPCODE_TXD, 1443 target == TGSI_TEXTURE_CUBE_ARRAY || 1444 target == TGSI_TEXTURE_SHADOWCUBE_ARRAY, 1445 opcode == TGSI_OPCODE_LODQ, 1446 coords, derivs); 1447 } else if (tgsi_is_array_sampler(target) && 1448 opcode != TGSI_OPCODE_TXF && 1449 opcode != TGSI_OPCODE_TXF_LZ && 1450 ctx->screen->info.chip_class <= VI) { 1451 unsigned array_coord = target == TGSI_TEXTURE_1D_ARRAY ? 1 : 2; 1452 coords[array_coord] = 1453 ac_build_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, 1454 &coords[array_coord], 1, 0); 1455 } 1456 1457 if (opcode == TGSI_OPCODE_TXD) 1458 for (int i = 0; i < num_deriv_channels * 2; i++) 1459 address[count++] = derivs[i]; 1460 1461 /* Pack texture coordinates */ 1462 address[count++] = coords[0]; 1463 if (num_coords > 1) 1464 address[count++] = coords[1]; 1465 if (num_coords > 2) 1466 address[count++] = coords[2]; 1467 1468 /* 1D textures are allocated and used as 2D on GFX9. */ 1469 if (ctx->screen->info.chip_class >= GFX9) { 1470 LLVMValueRef filler; 1471 1472 /* Use 0.5, so that we don't sample the border color. */ 1473 if (opcode == TGSI_OPCODE_TXF || 1474 opcode == TGSI_OPCODE_TXF_LZ) 1475 filler = ctx->i32_0; 1476 else 1477 filler = LLVMConstReal(ctx->f32, 0.5); 1478 1479 if (target == TGSI_TEXTURE_1D || 1480 target == TGSI_TEXTURE_SHADOW1D) { 1481 address[count++] = filler; 1482 } else if (target == TGSI_TEXTURE_1D_ARRAY || 1483 target == TGSI_TEXTURE_SHADOW1D_ARRAY) { 1484 address[count] = address[count - 1]; 1485 address[count - 1] = filler; 1486 count++; 1487 } 1488 } 1489 1490 /* Pack LOD or sample index */ 1491 if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXF) 1492 address[count++] = coords[3]; 1493 else if (opcode == TGSI_OPCODE_TXL2) 1494 address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); 1495 1496 if (count > 16) { 1497 assert(!"Cannot handle more than 16 texture address parameters"); 1498 count = 16; 1499 } 1500 1501 for (chan = 0; chan < count; chan++) 1502 address[chan] = ac_to_integer(&ctx->ac, address[chan]); 1503 1504 /* Adjust the sample index according to FMASK. 1505 * 1506 * For uncompressed MSAA surfaces, FMASK should return 0x76543210, 1507 * which is the identity mapping. Each nibble says which physical sample 1508 * should be fetched to get that sample. 1509 * 1510 * For example, 0x11111100 means there are only 2 samples stored and 1511 * the second sample covers 3/4 of the pixel. When reading samples 0 1512 * and 1, return physical sample 0 (determined by the first two 0s 1513 * in FMASK), otherwise return physical sample 1. 1514 * 1515 * The sample index should be adjusted as follows: 1516 * sample_index = (fmask >> (sample_index * 4)) & 0xF; 1517 */ 1518 if (target == TGSI_TEXTURE_2D_MSAA || 1519 target == TGSI_TEXTURE_2D_ARRAY_MSAA) { 1520 struct lp_build_emit_data txf_emit_data = *emit_data; 1521 LLVMValueRef txf_address[4]; 1522 /* We only need .xy for non-arrays, and .xyz for arrays. */ 1523 unsigned txf_count = target == TGSI_TEXTURE_2D_MSAA ? 2 : 3; 1524 struct tgsi_full_instruction inst = {}; 1525 1526 memcpy(txf_address, address, sizeof(txf_address)); 1527 1528 /* Read FMASK using TXF_LZ. */ 1529 inst.Instruction.Opcode = TGSI_OPCODE_TXF_LZ; 1530 inst.Texture.Texture = target; 1531 txf_emit_data.inst = &inst; 1532 txf_emit_data.chan = 0; 1533 set_tex_fetch_args(ctx, &txf_emit_data, 1534 target, fmask_ptr, NULL, 1535 txf_address, txf_count, 0xf); 1536 build_tex_intrinsic(&tex_action, bld_base, &txf_emit_data); 1537 1538 /* Initialize some constants. */ 1539 LLVMValueRef four = LLVMConstInt(ctx->i32, 4, 0); 1540 LLVMValueRef F = LLVMConstInt(ctx->i32, 0xF, 0); 1541 1542 /* Apply the formula. */ 1543 LLVMValueRef fmask = 1544 LLVMBuildExtractElement(ctx->ac.builder, 1545 txf_emit_data.output[0], 1546 ctx->i32_0, ""); 1547 1548 unsigned sample_chan = txf_count; /* the sample index is last */ 1549 1550 LLVMValueRef sample_index4 = 1551 LLVMBuildMul(ctx->ac.builder, address[sample_chan], four, ""); 1552 1553 LLVMValueRef shifted_fmask = 1554 LLVMBuildLShr(ctx->ac.builder, fmask, sample_index4, ""); 1555 1556 LLVMValueRef final_sample = 1557 LLVMBuildAnd(ctx->ac.builder, shifted_fmask, F, ""); 1558 1559 /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK 1560 * resource descriptor is 0 (invalid), 1561 */ 1562 LLVMValueRef fmask_desc = 1563 LLVMBuildBitCast(ctx->ac.builder, fmask_ptr, 1564 ctx->v8i32, ""); 1565 1566 LLVMValueRef fmask_word1 = 1567 LLVMBuildExtractElement(ctx->ac.builder, fmask_desc, 1568 ctx->i32_1, ""); 1569 1570 LLVMValueRef word1_is_nonzero = 1571 LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, 1572 fmask_word1, ctx->i32_0, ""); 1573 1574 /* Replace the MSAA sample index. */ 1575 address[sample_chan] = 1576 LLVMBuildSelect(ctx->ac.builder, word1_is_nonzero, 1577 final_sample, address[sample_chan], ""); 1578 } 1579 1580 if (opcode == TGSI_OPCODE_TXF || 1581 opcode == TGSI_OPCODE_TXF_LZ) { 1582 /* add tex offsets */ 1583 if (inst->Texture.NumOffsets) { 1584 struct lp_build_context *uint_bld = &bld_base->uint_bld; 1585 const struct tgsi_texture_offset *off = inst->TexOffsets; 1586 1587 assert(inst->Texture.NumOffsets == 1); 1588 1589 switch (target) { 1590 case TGSI_TEXTURE_3D: 1591 address[2] = lp_build_add(uint_bld, address[2], 1592 ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleZ]); 1593 /* fall through */ 1594 case TGSI_TEXTURE_2D: 1595 case TGSI_TEXTURE_SHADOW2D: 1596 case TGSI_TEXTURE_RECT: 1597 case TGSI_TEXTURE_SHADOWRECT: 1598 case TGSI_TEXTURE_2D_ARRAY: 1599 case TGSI_TEXTURE_SHADOW2D_ARRAY: 1600 address[1] = 1601 lp_build_add(uint_bld, address[1], 1602 ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleY]); 1603 /* fall through */ 1604 case TGSI_TEXTURE_1D: 1605 case TGSI_TEXTURE_SHADOW1D: 1606 case TGSI_TEXTURE_1D_ARRAY: 1607 case TGSI_TEXTURE_SHADOW1D_ARRAY: 1608 address[0] = 1609 lp_build_add(uint_bld, address[0], 1610 ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleX]); 1611 break; 1612 /* texture offsets do not apply to other texture targets */ 1613 } 1614 } 1615 } 1616 1617 if (opcode == TGSI_OPCODE_TG4) { 1618 unsigned gather_comp = 0; 1619 1620 /* DMASK was repurposed for GATHER4. 4 components are always 1621 * returned and DMASK works like a swizzle - it selects 1622 * the component to fetch. The only valid DMASK values are 1623 * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 1624 * (red,red,red,red) etc.) The ISA document doesn't mention 1625 * this. 1626 */ 1627 1628 /* Get the component index from src1.x for Gather4. */ 1629 if (!tgsi_is_shadow_target(target)) { 1630 LLVMValueRef comp_imm; 1631 struct tgsi_src_register src1 = inst->Src[1].Register; 1632 1633 assert(src1.File == TGSI_FILE_IMMEDIATE); 1634 1635 comp_imm = ctx->imms[src1.Index * TGSI_NUM_CHANNELS + src1.SwizzleX]; 1636 gather_comp = LLVMConstIntGetZExtValue(comp_imm); 1637 gather_comp = CLAMP(gather_comp, 0, 3); 1638 } 1639 1640 dmask = 1 << gather_comp; 1641 } 1642 1643 set_tex_fetch_args(ctx, emit_data, target, res_ptr, 1644 samp_ptr, address, count, dmask); 1645 } 1646 1647 /* Gather4 should follow the same rules as bilinear filtering, but the hardware 1648 * incorrectly forces nearest filtering if the texture format is integer. 1649 * The only effect it has on Gather4, which always returns 4 texels for 1650 * bilinear filtering, is that the final coordinates are off by 0.5 of 1651 * the texel size. 1652 * 1653 * The workaround is to subtract 0.5 from the unnormalized coordinates, 1654 * or (0.5 / size) from the normalized coordinates. 1655 * 1656 * However, cube textures with 8_8_8_8 data formats require a different 1657 * workaround of overriding the num format to USCALED/SSCALED. This would lose 1658 * precision in 32-bit data formats, so it needs to be applied dynamically at 1659 * runtime. In this case, return an i1 value that indicates whether the 1660 * descriptor was overridden (and hence a fixup of the sampler result is needed). 1661 */ 1662 static LLVMValueRef 1663 si_lower_gather4_integer(struct si_shader_context *ctx, 1664 struct ac_image_args *args, 1665 unsigned target, 1666 enum tgsi_return_type return_type) 1667 { 1668 LLVMBuilderRef builder = ctx->ac.builder; 1669 LLVMValueRef wa_8888 = NULL; 1670 LLVMValueRef coord = args->addr; 1671 LLVMValueRef half_texel[2]; 1672 /* Texture coordinates start after: 1673 * {offset, bias, z-compare, derivatives} 1674 * Only the offset and z-compare can occur here. 1675 */ 1676 unsigned coord_vgpr_index = (int)args->offset + (int)args->compare; 1677 int c; 1678 1679 assert(return_type == TGSI_RETURN_TYPE_SINT || 1680 return_type == TGSI_RETURN_TYPE_UINT); 1681 1682 if (target == TGSI_TEXTURE_CUBE || 1683 target == TGSI_TEXTURE_CUBE_ARRAY) { 1684 LLVMValueRef formats; 1685 LLVMValueRef data_format; 1686 LLVMValueRef wa_formats; 1687 1688 formats = LLVMBuildExtractElement(builder, args->resource, ctx->i32_1, ""); 1689 1690 data_format = LLVMBuildLShr(builder, formats, 1691 LLVMConstInt(ctx->i32, 20, false), ""); 1692 data_format = LLVMBuildAnd(builder, data_format, 1693 LLVMConstInt(ctx->i32, (1u << 6) - 1, false), ""); 1694 wa_8888 = LLVMBuildICmp( 1695 builder, LLVMIntEQ, data_format, 1696 LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), 1697 ""); 1698 1699 uint32_t wa_num_format = 1700 return_type == TGSI_RETURN_TYPE_UINT ? 1701 S_008F14_NUM_FORMAT_GFX6(V_008F14_IMG_NUM_FORMAT_USCALED) : 1702 S_008F14_NUM_FORMAT_GFX6(V_008F14_IMG_NUM_FORMAT_SSCALED); 1703 wa_formats = LLVMBuildAnd(builder, formats, 1704 LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false), 1705 ""); 1706 wa_formats = LLVMBuildOr(builder, wa_formats, 1707 LLVMConstInt(ctx->i32, wa_num_format, false), ""); 1708 1709 formats = LLVMBuildSelect(builder, wa_8888, wa_formats, formats, ""); 1710 args->resource = LLVMBuildInsertElement( 1711 builder, args->resource, formats, ctx->i32_1, ""); 1712 } 1713 1714 if (target == TGSI_TEXTURE_RECT || 1715 target == TGSI_TEXTURE_SHADOWRECT) { 1716 assert(!wa_8888); 1717 half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5); 1718 } else { 1719 struct tgsi_full_instruction txq_inst = {}; 1720 struct lp_build_emit_data txq_emit_data = {}; 1721 struct lp_build_if_state if_ctx; 1722 1723 if (wa_8888) { 1724 /* Skip the texture size query entirely if we don't need it. */ 1725 lp_build_if(&if_ctx, &ctx->gallivm, LLVMBuildNot(builder, wa_8888, "")); 1726 } 1727 1728 /* Query the texture size. */ 1729 txq_inst.Texture.Texture = target; 1730 txq_emit_data.inst = &txq_inst; 1731 txq_emit_data.dst_type = ctx->v4i32; 1732 set_tex_fetch_args(ctx, &txq_emit_data, target, 1733 args->resource, NULL, &ctx->i32_0, 1734 1, 0xf); 1735 txq_emit(NULL, &ctx->bld_base, &txq_emit_data); 1736 1737 /* Compute -0.5 / size. */ 1738 for (c = 0; c < 2; c++) { 1739 half_texel[c] = 1740 LLVMBuildExtractElement(builder, txq_emit_data.output[0], 1741 LLVMConstInt(ctx->i32, c, 0), ""); 1742 half_texel[c] = LLVMBuildUIToFP(builder, half_texel[c], ctx->f32, ""); 1743 half_texel[c] = 1744 lp_build_emit_llvm_unary(&ctx->bld_base, 1745 TGSI_OPCODE_RCP, half_texel[c]); 1746 half_texel[c] = LLVMBuildFMul(builder, half_texel[c], 1747 LLVMConstReal(ctx->f32, -0.5), ""); 1748 } 1749 1750 if (wa_8888) { 1751 lp_build_endif(&if_ctx); 1752 1753 LLVMBasicBlockRef bb[2] = { if_ctx.true_block, if_ctx.entry_block }; 1754 1755 for (c = 0; c < 2; c++) { 1756 LLVMValueRef values[2] = { half_texel[c], ctx->ac.f32_0 }; 1757 half_texel[c] = ac_build_phi(&ctx->ac, ctx->f32, 2, 1758 values, bb); 1759 } 1760 } 1761 } 1762 1763 for (c = 0; c < 2; c++) { 1764 LLVMValueRef tmp; 1765 LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0); 1766 1767 tmp = LLVMBuildExtractElement(builder, coord, index, ""); 1768 tmp = ac_to_float(&ctx->ac, tmp); 1769 tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], ""); 1770 tmp = ac_to_integer(&ctx->ac, tmp); 1771 coord = LLVMBuildInsertElement(builder, coord, tmp, index, ""); 1772 } 1773 1774 args->addr = coord; 1775 1776 return wa_8888; 1777 } 1778 1779 /* The second half of the cube texture 8_8_8_8 integer workaround: adjust the 1780 * result after the gather operation. 1781 */ 1782 static LLVMValueRef 1783 si_fix_gather4_integer_result(struct si_shader_context *ctx, 1784 LLVMValueRef result, 1785 enum tgsi_return_type return_type, 1786 LLVMValueRef wa) 1787 { 1788 LLVMBuilderRef builder = ctx->ac.builder; 1789 1790 assert(return_type == TGSI_RETURN_TYPE_SINT || 1791 return_type == TGSI_RETURN_TYPE_UINT); 1792 1793 for (unsigned chan = 0; chan < 4; ++chan) { 1794 LLVMValueRef chanv = LLVMConstInt(ctx->i32, chan, false); 1795 LLVMValueRef value; 1796 LLVMValueRef wa_value; 1797 1798 value = LLVMBuildExtractElement(builder, result, chanv, ""); 1799 1800 if (return_type == TGSI_RETURN_TYPE_UINT) 1801 wa_value = LLVMBuildFPToUI(builder, value, ctx->i32, ""); 1802 else 1803 wa_value = LLVMBuildFPToSI(builder, value, ctx->i32, ""); 1804 wa_value = ac_to_float(&ctx->ac, wa_value); 1805 value = LLVMBuildSelect(builder, wa, wa_value, value, ""); 1806 1807 result = LLVMBuildInsertElement(builder, result, value, chanv, ""); 1808 } 1809 1810 return result; 1811 } 1812 1813 static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, 1814 struct lp_build_tgsi_context *bld_base, 1815 struct lp_build_emit_data *emit_data) 1816 { 1817 struct si_shader_context *ctx = si_shader_context(bld_base); 1818 const struct tgsi_full_instruction *inst = emit_data->inst; 1819 struct ac_image_args args; 1820 unsigned opcode = inst->Instruction.Opcode; 1821 unsigned target = inst->Texture.Texture; 1822 1823 if (target == TGSI_TEXTURE_BUFFER) { 1824 emit_data->output[emit_data->chan] = 1825 ac_build_buffer_load_format(&ctx->ac, 1826 emit_data->args[0], 1827 emit_data->args[2], 1828 emit_data->args[1], 1829 true); 1830 return; 1831 } 1832 1833 memcpy(&args, emit_data->args, sizeof(args)); /* ugly */ 1834 1835 args.opcode = ac_image_sample; 1836 args.compare = tgsi_is_shadow_target(target); 1837 args.offset = inst->Texture.NumOffsets > 0; 1838 1839 switch (opcode) { 1840 case TGSI_OPCODE_TXF: 1841 case TGSI_OPCODE_TXF_LZ: 1842 args.opcode = opcode == TGSI_OPCODE_TXF_LZ || 1843 target == TGSI_TEXTURE_2D_MSAA || 1844 target == TGSI_TEXTURE_2D_ARRAY_MSAA ? 1845 ac_image_load : ac_image_load_mip; 1846 args.compare = false; 1847 args.offset = false; 1848 break; 1849 case TGSI_OPCODE_LODQ: 1850 args.opcode = ac_image_get_lod; 1851 args.compare = false; 1852 args.offset = false; 1853 break; 1854 case TGSI_OPCODE_TEX: 1855 case TGSI_OPCODE_TEX2: 1856 case TGSI_OPCODE_TXP: 1857 if (ctx->type != PIPE_SHADER_FRAGMENT) 1858 args.level_zero = true; 1859 break; 1860 case TGSI_OPCODE_TEX_LZ: 1861 args.level_zero = true; 1862 break; 1863 case TGSI_OPCODE_TXB: 1864 case TGSI_OPCODE_TXB2: 1865 assert(ctx->type == PIPE_SHADER_FRAGMENT); 1866 args.bias = true; 1867 break; 1868 case TGSI_OPCODE_TXL: 1869 case TGSI_OPCODE_TXL2: 1870 args.lod = true; 1871 break; 1872 case TGSI_OPCODE_TXD: 1873 args.deriv = true; 1874 break; 1875 case TGSI_OPCODE_TG4: 1876 args.opcode = ac_image_gather4; 1877 args.level_zero = true; 1878 break; 1879 default: 1880 assert(0); 1881 return; 1882 } 1883 1884 /* The hardware needs special lowering for Gather4 with integer formats. */ 1885 LLVMValueRef gather4_int_result_workaround = NULL; 1886 1887 if (ctx->screen->info.chip_class <= VI && 1888 opcode == TGSI_OPCODE_TG4) { 1889 assert(inst->Texture.ReturnType != TGSI_RETURN_TYPE_UNKNOWN); 1890 1891 if (inst->Texture.ReturnType == TGSI_RETURN_TYPE_SINT || 1892 inst->Texture.ReturnType == TGSI_RETURN_TYPE_UINT) { 1893 gather4_int_result_workaround = 1894 si_lower_gather4_integer(ctx, &args, target, 1895 inst->Texture.ReturnType); 1896 } 1897 } 1898 1899 LLVMValueRef result = 1900 ac_build_image_opcode(&ctx->ac, &args); 1901 1902 if (gather4_int_result_workaround) { 1903 result = si_fix_gather4_integer_result(ctx, result, 1904 inst->Texture.ReturnType, 1905 gather4_int_result_workaround); 1906 } 1907 1908 emit_data->output[emit_data->chan] = result; 1909 } 1910 1911 static void si_llvm_emit_txqs( 1912 const struct lp_build_tgsi_action *action, 1913 struct lp_build_tgsi_context *bld_base, 1914 struct lp_build_emit_data *emit_data) 1915 { 1916 struct si_shader_context *ctx = si_shader_context(bld_base); 1917 LLVMValueRef res, samples; 1918 LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL; 1919 1920 tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr); 1921 1922 1923 /* Read the samples from the descriptor directly. */ 1924 res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->v8i32, ""); 1925 samples = LLVMBuildExtractElement(ctx->ac.builder, res, 1926 LLVMConstInt(ctx->i32, 3, 0), ""); 1927 samples = LLVMBuildLShr(ctx->ac.builder, samples, 1928 LLVMConstInt(ctx->i32, 16, 0), ""); 1929 samples = LLVMBuildAnd(ctx->ac.builder, samples, 1930 LLVMConstInt(ctx->i32, 0xf, 0), ""); 1931 samples = LLVMBuildShl(ctx->ac.builder, ctx->i32_1, 1932 samples, ""); 1933 1934 emit_data->output[emit_data->chan] = samples; 1935 } 1936 1937 static const struct lp_build_tgsi_action tex_action = { 1938 .fetch_args = tex_fetch_args, 1939 .emit = build_tex_intrinsic, 1940 }; 1941 1942 /** 1943 * Setup actions for TGSI memory opcode, including texture opcodes. 1944 */ 1945 void si_shader_context_init_mem(struct si_shader_context *ctx) 1946 { 1947 struct lp_build_tgsi_context *bld_base; 1948 struct lp_build_tgsi_action tmpl = {}; 1949 1950 bld_base = &ctx->bld_base; 1951 1952 bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action; 1953 bld_base->op_actions[TGSI_OPCODE_TEX_LZ] = tex_action; 1954 bld_base->op_actions[TGSI_OPCODE_TEX2] = tex_action; 1955 bld_base->op_actions[TGSI_OPCODE_TXB] = tex_action; 1956 bld_base->op_actions[TGSI_OPCODE_TXB2] = tex_action; 1957 bld_base->op_actions[TGSI_OPCODE_TXD] = tex_action; 1958 bld_base->op_actions[TGSI_OPCODE_TXF] = tex_action; 1959 bld_base->op_actions[TGSI_OPCODE_TXF_LZ] = tex_action; 1960 bld_base->op_actions[TGSI_OPCODE_TXL] = tex_action; 1961 bld_base->op_actions[TGSI_OPCODE_TXL2] = tex_action; 1962 bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action; 1963 bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = txq_fetch_args; 1964 bld_base->op_actions[TGSI_OPCODE_TXQ].emit = txq_emit; 1965 bld_base->op_actions[TGSI_OPCODE_TG4] = tex_action; 1966 bld_base->op_actions[TGSI_OPCODE_LODQ] = tex_action; 1967 bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs; 1968 1969 bld_base->op_actions[TGSI_OPCODE_LOAD].fetch_args = load_fetch_args; 1970 bld_base->op_actions[TGSI_OPCODE_LOAD].emit = load_emit; 1971 bld_base->op_actions[TGSI_OPCODE_STORE].fetch_args = store_fetch_args; 1972 bld_base->op_actions[TGSI_OPCODE_STORE].emit = store_emit; 1973 bld_base->op_actions[TGSI_OPCODE_RESQ].fetch_args = resq_fetch_args; 1974 bld_base->op_actions[TGSI_OPCODE_RESQ].emit = resq_emit; 1975 1976 tmpl.fetch_args = atomic_fetch_args; 1977 tmpl.emit = atomic_emit; 1978 bld_base->op_actions[TGSI_OPCODE_ATOMUADD] = tmpl; 1979 bld_base->op_actions[TGSI_OPCODE_ATOMUADD].intr_name = "add"; 1980 bld_base->op_actions[TGSI_OPCODE_ATOMXCHG] = tmpl; 1981 bld_base->op_actions[TGSI_OPCODE_ATOMXCHG].intr_name = "swap"; 1982 bld_base->op_actions[TGSI_OPCODE_ATOMCAS] = tmpl; 1983 bld_base->op_actions[TGSI_OPCODE_ATOMCAS].intr_name = "cmpswap"; 1984 bld_base->op_actions[TGSI_OPCODE_ATOMAND] = tmpl; 1985 bld_base->op_actions[TGSI_OPCODE_ATOMAND].intr_name = "and"; 1986 bld_base->op_actions[TGSI_OPCODE_ATOMOR] = tmpl; 1987 bld_base->op_actions[TGSI_OPCODE_ATOMOR].intr_name = "or"; 1988 bld_base->op_actions[TGSI_OPCODE_ATOMXOR] = tmpl; 1989 bld_base->op_actions[TGSI_OPCODE_ATOMXOR].intr_name = "xor"; 1990 bld_base->op_actions[TGSI_OPCODE_ATOMUMIN] = tmpl; 1991 bld_base->op_actions[TGSI_OPCODE_ATOMUMIN].intr_name = "umin"; 1992 bld_base->op_actions[TGSI_OPCODE_ATOMUMAX] = tmpl; 1993 bld_base->op_actions[TGSI_OPCODE_ATOMUMAX].intr_name = "umax"; 1994 bld_base->op_actions[TGSI_OPCODE_ATOMIMIN] = tmpl; 1995 bld_base->op_actions[TGSI_OPCODE_ATOMIMIN].intr_name = "smin"; 1996 bld_base->op_actions[TGSI_OPCODE_ATOMIMAX] = tmpl; 1997 bld_base->op_actions[TGSI_OPCODE_ATOMIMAX].intr_name = "smax"; 1998 } 1999