1 /* 2 * Copyright 2016 Bas Nieuwenhuizen 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include "ac_nir_to_llvm.h" 25 #include "ac_llvm_build.h" 26 #include "ac_llvm_util.h" 27 #include "ac_binary.h" 28 #include "sid.h" 29 #include "nir/nir.h" 30 #include "../vulkan/radv_descriptor_set.h" 31 #include "util/bitscan.h" 32 #include <llvm-c/Transforms/Scalar.h> 33 #include "ac_shader_abi.h" 34 #include "ac_shader_info.h" 35 #include "ac_shader_util.h" 36 #include "ac_exp_param.h" 37 38 enum radeon_llvm_calling_convention { 39 RADEON_LLVM_AMDGPU_VS = 87, 40 RADEON_LLVM_AMDGPU_GS = 88, 41 RADEON_LLVM_AMDGPU_PS = 89, 42 RADEON_LLVM_AMDGPU_CS = 90, 43 RADEON_LLVM_AMDGPU_HS = 93, 44 }; 45 46 #define CONST_ADDR_SPACE 2 47 #define LOCAL_ADDR_SPACE 3 48 49 #define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1) 50 #define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1) 51 52 struct nir_to_llvm_context; 53 54 struct ac_nir_context { 55 struct ac_llvm_context ac; 56 struct ac_shader_abi *abi; 57 58 gl_shader_stage stage; 59 60 struct hash_table *defs; 61 struct hash_table *phis; 62 struct hash_table *vars; 63 64 LLVMValueRef main_function; 65 LLVMBasicBlockRef continue_block; 66 LLVMBasicBlockRef break_block; 67 68 LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4]; 69 70 int num_locals; 71 LLVMValueRef *locals; 72 73 struct nir_to_llvm_context *nctx; /* TODO get rid of this */ 74 }; 75 76 struct nir_to_llvm_context { 77 struct ac_llvm_context ac; 78 const struct ac_nir_compiler_options *options; 79 struct ac_shader_variant_info *shader_info; 80 struct ac_shader_abi abi; 81 struct ac_nir_context *nir; 82 83 unsigned max_workgroup_size; 84 LLVMContextRef context; 85 LLVMModuleRef module; 86 LLVMBuilderRef builder; 87 LLVMValueRef main_function; 88 89 struct hash_table *defs; 90 struct hash_table *phis; 91 92 LLVMValueRef descriptor_sets[AC_UD_MAX_SETS]; 93 LLVMValueRef ring_offsets; 94 LLVMValueRef push_constants; 95 LLVMValueRef view_index; 96 LLVMValueRef num_work_groups; 97 LLVMValueRef workgroup_ids[3]; 98 LLVMValueRef local_invocation_ids; 99 LLVMValueRef tg_size; 100 101 LLVMValueRef vertex_buffers; 102 LLVMValueRef rel_auto_id; 103 LLVMValueRef vs_prim_id; 104 LLVMValueRef ls_out_layout; 105 LLVMValueRef es2gs_offset; 106 107 LLVMValueRef tcs_offchip_layout; 108 LLVMValueRef tcs_out_offsets; 109 LLVMValueRef tcs_out_layout; 110 LLVMValueRef tcs_in_layout; 111 LLVMValueRef oc_lds; 112 LLVMValueRef merged_wave_info; 113 LLVMValueRef tess_factor_offset; 114 LLVMValueRef tes_rel_patch_id; 115 LLVMValueRef tes_u; 116 LLVMValueRef tes_v; 117 118 LLVMValueRef gsvs_ring_stride; 119 LLVMValueRef gsvs_num_entries; 120 LLVMValueRef gs2vs_offset; 121 LLVMValueRef gs_wave_id; 122 LLVMValueRef gs_vtx_offset[6]; 123 124 LLVMValueRef esgs_ring; 125 LLVMValueRef gsvs_ring; 126 LLVMValueRef hs_ring_tess_offchip; 127 LLVMValueRef hs_ring_tess_factor; 128 129 LLVMValueRef prim_mask; 130 LLVMValueRef sample_pos_offset; 131 LLVMValueRef persp_sample, persp_center, persp_centroid; 132 LLVMValueRef linear_sample, linear_center, linear_centroid; 133 134 gl_shader_stage stage; 135 136 LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4]; 137 138 uint64_t input_mask; 139 uint64_t output_mask; 140 uint8_t num_output_clips; 141 uint8_t num_output_culls; 142 143 bool is_gs_copy_shader; 144 LLVMValueRef gs_next_vertex; 145 unsigned gs_max_out_vertices; 146 147 unsigned tes_primitive_mode; 148 uint64_t tess_outputs_written; 149 uint64_t tess_patch_outputs_written; 150 151 uint32_t tcs_patch_outputs_read; 152 uint64_t tcs_outputs_read; 153 }; 154 155 static inline struct nir_to_llvm_context * 156 nir_to_llvm_context_from_abi(struct ac_shader_abi *abi) 157 { 158 struct nir_to_llvm_context *ctx = NULL; 159 return container_of(abi, ctx, abi); 160 } 161 162 static LLVMTypeRef 163 nir2llvmtype(struct ac_nir_context *ctx, 164 const struct glsl_type *type) 165 { 166 switch (glsl_get_base_type(glsl_without_array(type))) { 167 case GLSL_TYPE_UINT: 168 case GLSL_TYPE_INT: 169 return ctx->ac.i32; 170 case GLSL_TYPE_UINT64: 171 case GLSL_TYPE_INT64: 172 return ctx->ac.i64; 173 case GLSL_TYPE_DOUBLE: 174 return ctx->ac.f64; 175 case GLSL_TYPE_FLOAT: 176 return ctx->ac.f32; 177 default: 178 assert(!"Unsupported type in nir2llvmtype()"); 179 break; 180 } 181 return 0; 182 } 183 184 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, 185 const nir_deref_var *deref, 186 enum ac_descriptor_type desc_type, 187 const nir_tex_instr *instr, 188 bool image, bool write); 189 190 static unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan) 191 { 192 return (index * 4) + chan; 193 } 194 195 static unsigned shader_io_get_unique_index(gl_varying_slot slot) 196 { 197 /* handle patch indices separate */ 198 if (slot == VARYING_SLOT_TESS_LEVEL_OUTER) 199 return 0; 200 if (slot == VARYING_SLOT_TESS_LEVEL_INNER) 201 return 1; 202 if (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_TESS_MAX) 203 return 2 + (slot - VARYING_SLOT_PATCH0); 204 205 if (slot == VARYING_SLOT_POS) 206 return 0; 207 if (slot == VARYING_SLOT_PSIZ) 208 return 1; 209 if (slot == VARYING_SLOT_CLIP_DIST0) 210 return 2; 211 /* 3 is reserved for clip dist as well */ 212 if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31) 213 return 4 + (slot - VARYING_SLOT_VAR0); 214 unreachable("illegal slot in get unique index\n"); 215 } 216 217 static void set_llvm_calling_convention(LLVMValueRef func, 218 gl_shader_stage stage) 219 { 220 enum radeon_llvm_calling_convention calling_conv; 221 222 switch (stage) { 223 case MESA_SHADER_VERTEX: 224 case MESA_SHADER_TESS_EVAL: 225 calling_conv = RADEON_LLVM_AMDGPU_VS; 226 break; 227 case MESA_SHADER_GEOMETRY: 228 calling_conv = RADEON_LLVM_AMDGPU_GS; 229 break; 230 case MESA_SHADER_TESS_CTRL: 231 calling_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS : RADEON_LLVM_AMDGPU_VS; 232 break; 233 case MESA_SHADER_FRAGMENT: 234 calling_conv = RADEON_LLVM_AMDGPU_PS; 235 break; 236 case MESA_SHADER_COMPUTE: 237 calling_conv = RADEON_LLVM_AMDGPU_CS; 238 break; 239 default: 240 unreachable("Unhandle shader type"); 241 } 242 243 LLVMSetFunctionCallConv(func, calling_conv); 244 } 245 246 #define MAX_ARGS 23 247 struct arg_info { 248 LLVMTypeRef types[MAX_ARGS]; 249 LLVMValueRef *assign[MAX_ARGS]; 250 unsigned array_params_mask; 251 uint8_t count; 252 uint8_t sgpr_count; 253 uint8_t num_sgprs_used; 254 uint8_t num_vgprs_used; 255 }; 256 257 enum ac_arg_regfile { 258 ARG_SGPR, 259 ARG_VGPR, 260 }; 261 262 static void 263 add_arg(struct arg_info *info, enum ac_arg_regfile regfile, LLVMTypeRef type, 264 LLVMValueRef *param_ptr) 265 { 266 assert(info->count < MAX_ARGS); 267 268 info->assign[info->count] = param_ptr; 269 info->types[info->count] = type; 270 info->count++; 271 272 if (regfile == ARG_SGPR) { 273 info->num_sgprs_used += ac_get_type_size(type) / 4; 274 info->sgpr_count++; 275 } else { 276 assert(regfile == ARG_VGPR); 277 info->num_vgprs_used += ac_get_type_size(type) / 4; 278 } 279 } 280 281 static inline void 282 add_array_arg(struct arg_info *info, LLVMTypeRef type, LLVMValueRef *param_ptr) 283 { 284 info->array_params_mask |= (1 << info->count); 285 add_arg(info, ARG_SGPR, type, param_ptr); 286 } 287 288 static void assign_arguments(LLVMValueRef main_function, 289 struct arg_info *info) 290 { 291 unsigned i; 292 for (i = 0; i < info->count; i++) { 293 if (info->assign[i]) 294 *info->assign[i] = LLVMGetParam(main_function, i); 295 } 296 } 297 298 static LLVMValueRef 299 create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, 300 LLVMBuilderRef builder, LLVMTypeRef *return_types, 301 unsigned num_return_elems, 302 struct arg_info *args, 303 unsigned max_workgroup_size, 304 bool unsafe_math) 305 { 306 LLVMTypeRef main_function_type, ret_type; 307 LLVMBasicBlockRef main_function_body; 308 309 if (num_return_elems) 310 ret_type = LLVMStructTypeInContext(ctx, return_types, 311 num_return_elems, true); 312 else 313 ret_type = LLVMVoidTypeInContext(ctx); 314 315 /* Setup the function */ 316 main_function_type = 317 LLVMFunctionType(ret_type, args->types, args->count, 0); 318 LLVMValueRef main_function = 319 LLVMAddFunction(module, "main", main_function_type); 320 main_function_body = 321 LLVMAppendBasicBlockInContext(ctx, main_function, "main_body"); 322 LLVMPositionBuilderAtEnd(builder, main_function_body); 323 324 LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS); 325 for (unsigned i = 0; i < args->sgpr_count; ++i) { 326 if (args->array_params_mask & (1 << i)) { 327 LLVMValueRef P = LLVMGetParam(main_function, i); 328 ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_BYVAL); 329 ac_add_attr_dereferenceable(P, UINT64_MAX); 330 } 331 else { 332 ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG); 333 } 334 } 335 336 if (max_workgroup_size) { 337 ac_llvm_add_target_dep_function_attr(main_function, 338 "amdgpu-max-work-group-size", 339 max_workgroup_size); 340 } 341 if (unsafe_math) { 342 /* These were copied from some LLVM test. */ 343 LLVMAddTargetDependentFunctionAttr(main_function, 344 "less-precise-fpmad", 345 "true"); 346 LLVMAddTargetDependentFunctionAttr(main_function, 347 "no-infs-fp-math", 348 "true"); 349 LLVMAddTargetDependentFunctionAttr(main_function, 350 "no-nans-fp-math", 351 "true"); 352 LLVMAddTargetDependentFunctionAttr(main_function, 353 "unsafe-fp-math", 354 "true"); 355 LLVMAddTargetDependentFunctionAttr(main_function, 356 "no-signed-zeros-fp-math", 357 "true"); 358 } 359 return main_function; 360 } 361 362 static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements) 363 { 364 return LLVMPointerType(LLVMArrayType(elem_type, num_elements), 365 CONST_ADDR_SPACE); 366 } 367 368 static int get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type) 369 { 370 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) 371 type = LLVMGetElementType(type); 372 373 if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind) 374 return LLVMGetIntTypeWidth(type); 375 376 if (type == ctx->f16) 377 return 16; 378 if (type == ctx->f32) 379 return 32; 380 if (type == ctx->f64) 381 return 64; 382 383 unreachable("Unhandled type kind in get_elem_bits"); 384 } 385 386 static LLVMValueRef unpack_param(struct ac_llvm_context *ctx, 387 LLVMValueRef param, unsigned rshift, 388 unsigned bitwidth) 389 { 390 LLVMValueRef value = param; 391 if (rshift) 392 value = LLVMBuildLShr(ctx->builder, value, 393 LLVMConstInt(ctx->i32, rshift, false), ""); 394 395 if (rshift + bitwidth < 32) { 396 unsigned mask = (1 << bitwidth) - 1; 397 value = LLVMBuildAnd(ctx->builder, value, 398 LLVMConstInt(ctx->i32, mask, false), ""); 399 } 400 return value; 401 } 402 403 static LLVMValueRef get_rel_patch_id(struct nir_to_llvm_context *ctx) 404 { 405 switch (ctx->stage) { 406 case MESA_SHADER_TESS_CTRL: 407 return unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 0, 8); 408 case MESA_SHADER_TESS_EVAL: 409 return ctx->tes_rel_patch_id; 410 break; 411 default: 412 unreachable("Illegal stage"); 413 } 414 } 415 416 /* Tessellation shaders pass outputs to the next shader using LDS. 417 * 418 * LS outputs = TCS inputs 419 * TCS outputs = TES inputs 420 * 421 * The LDS layout is: 422 * - TCS inputs for patch 0 423 * - TCS inputs for patch 1 424 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2) 425 * - ... 426 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset 427 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset 428 * - TCS outputs for patch 1 429 * - Per-patch TCS outputs for patch 1 430 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2) 431 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2) 432 * - ... 433 * 434 * All three shaders VS(LS), TCS, TES share the same LDS space. 435 */ 436 static LLVMValueRef 437 get_tcs_in_patch_stride(struct nir_to_llvm_context *ctx) 438 { 439 if (ctx->stage == MESA_SHADER_VERTEX) 440 return unpack_param(&ctx->ac, ctx->ls_out_layout, 0, 13); 441 else if (ctx->stage == MESA_SHADER_TESS_CTRL) 442 return unpack_param(&ctx->ac, ctx->tcs_in_layout, 0, 13); 443 else { 444 assert(0); 445 return NULL; 446 } 447 } 448 449 static LLVMValueRef 450 get_tcs_out_patch_stride(struct nir_to_llvm_context *ctx) 451 { 452 return unpack_param(&ctx->ac, ctx->tcs_out_layout, 0, 13); 453 } 454 455 static LLVMValueRef 456 get_tcs_out_patch0_offset(struct nir_to_llvm_context *ctx) 457 { 458 return LLVMBuildMul(ctx->builder, 459 unpack_param(&ctx->ac, ctx->tcs_out_offsets, 0, 16), 460 LLVMConstInt(ctx->ac.i32, 4, false), ""); 461 } 462 463 static LLVMValueRef 464 get_tcs_out_patch0_patch_data_offset(struct nir_to_llvm_context *ctx) 465 { 466 return LLVMBuildMul(ctx->builder, 467 unpack_param(&ctx->ac, ctx->tcs_out_offsets, 16, 16), 468 LLVMConstInt(ctx->ac.i32, 4, false), ""); 469 } 470 471 static LLVMValueRef 472 get_tcs_in_current_patch_offset(struct nir_to_llvm_context *ctx) 473 { 474 LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx); 475 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); 476 477 return LLVMBuildMul(ctx->builder, patch_stride, rel_patch_id, ""); 478 } 479 480 static LLVMValueRef 481 get_tcs_out_current_patch_offset(struct nir_to_llvm_context *ctx) 482 { 483 LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx); 484 LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx); 485 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); 486 487 return LLVMBuildAdd(ctx->builder, patch0_offset, 488 LLVMBuildMul(ctx->builder, patch_stride, 489 rel_patch_id, ""), 490 ""); 491 } 492 493 static LLVMValueRef 494 get_tcs_out_current_patch_data_offset(struct nir_to_llvm_context *ctx) 495 { 496 LLVMValueRef patch0_patch_data_offset = 497 get_tcs_out_patch0_patch_data_offset(ctx); 498 LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx); 499 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); 500 501 return LLVMBuildAdd(ctx->builder, patch0_patch_data_offset, 502 LLVMBuildMul(ctx->builder, patch_stride, 503 rel_patch_id, ""), 504 ""); 505 } 506 507 static void 508 set_loc(struct ac_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs, 509 uint32_t indirect_offset) 510 { 511 ud_info->sgpr_idx = *sgpr_idx; 512 ud_info->num_sgprs = num_sgprs; 513 ud_info->indirect = indirect_offset > 0; 514 ud_info->indirect_offset = indirect_offset; 515 *sgpr_idx += num_sgprs; 516 } 517 518 static void 519 set_loc_shader(struct nir_to_llvm_context *ctx, int idx, uint8_t *sgpr_idx, 520 uint8_t num_sgprs) 521 { 522 struct ac_userdata_info *ud_info = 523 &ctx->shader_info->user_sgprs_locs.shader_data[idx]; 524 assert(ud_info); 525 526 set_loc(ud_info, sgpr_idx, num_sgprs, 0); 527 } 528 529 static void 530 set_loc_desc(struct nir_to_llvm_context *ctx, int idx, uint8_t *sgpr_idx, 531 uint32_t indirect_offset) 532 { 533 struct ac_userdata_info *ud_info = 534 &ctx->shader_info->user_sgprs_locs.descriptor_sets[idx]; 535 assert(ud_info); 536 537 set_loc(ud_info, sgpr_idx, 2, indirect_offset); 538 } 539 540 struct user_sgpr_info { 541 bool need_ring_offsets; 542 uint8_t sgpr_count; 543 bool indirect_all_descriptor_sets; 544 }; 545 546 static bool needs_view_index_sgpr(struct nir_to_llvm_context *ctx, 547 gl_shader_stage stage) 548 { 549 switch (stage) { 550 case MESA_SHADER_VERTEX: 551 if (ctx->shader_info->info.needs_multiview_view_index || 552 (!ctx->options->key.vs.as_es && !ctx->options->key.vs.as_ls && ctx->options->key.has_multiview_view_index)) 553 return true; 554 break; 555 case MESA_SHADER_TESS_EVAL: 556 if (ctx->shader_info->info.needs_multiview_view_index || (!ctx->options->key.tes.as_es && ctx->options->key.has_multiview_view_index)) 557 return true; 558 case MESA_SHADER_GEOMETRY: 559 case MESA_SHADER_TESS_CTRL: 560 if (ctx->shader_info->info.needs_multiview_view_index) 561 return true; 562 default: 563 break; 564 } 565 return false; 566 } 567 568 static void allocate_user_sgprs(struct nir_to_llvm_context *ctx, 569 gl_shader_stage stage, 570 bool needs_view_index, 571 struct user_sgpr_info *user_sgpr_info) 572 { 573 memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info)); 574 575 /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */ 576 if (stage == MESA_SHADER_GEOMETRY || 577 stage == MESA_SHADER_VERTEX || 578 stage == MESA_SHADER_TESS_CTRL || 579 stage == MESA_SHADER_TESS_EVAL || 580 ctx->is_gs_copy_shader) 581 user_sgpr_info->need_ring_offsets = true; 582 583 if (stage == MESA_SHADER_FRAGMENT && 584 ctx->shader_info->info.ps.needs_sample_positions) 585 user_sgpr_info->need_ring_offsets = true; 586 587 /* 2 user sgprs will nearly always be allocated for scratch/rings */ 588 if (ctx->options->supports_spill || user_sgpr_info->need_ring_offsets) { 589 user_sgpr_info->sgpr_count += 2; 590 } 591 592 /* FIXME: fix the number of user sgprs for merged shaders on GFX9 */ 593 switch (stage) { 594 case MESA_SHADER_COMPUTE: 595 if (ctx->shader_info->info.cs.uses_grid_size) 596 user_sgpr_info->sgpr_count += 3; 597 break; 598 case MESA_SHADER_FRAGMENT: 599 user_sgpr_info->sgpr_count += ctx->shader_info->info.ps.needs_sample_positions; 600 break; 601 case MESA_SHADER_VERTEX: 602 if (!ctx->is_gs_copy_shader) { 603 user_sgpr_info->sgpr_count += ctx->shader_info->info.vs.has_vertex_buffers ? 2 : 0; 604 if (ctx->shader_info->info.vs.needs_draw_id) { 605 user_sgpr_info->sgpr_count += 3; 606 } else { 607 user_sgpr_info->sgpr_count += 2; 608 } 609 } 610 if (ctx->options->key.vs.as_ls) 611 user_sgpr_info->sgpr_count++; 612 break; 613 case MESA_SHADER_TESS_CTRL: 614 user_sgpr_info->sgpr_count += 4; 615 break; 616 case MESA_SHADER_TESS_EVAL: 617 user_sgpr_info->sgpr_count += 1; 618 break; 619 case MESA_SHADER_GEOMETRY: 620 user_sgpr_info->sgpr_count += 2; 621 break; 622 default: 623 break; 624 } 625 626 if (needs_view_index) 627 user_sgpr_info->sgpr_count++; 628 629 if (ctx->shader_info->info.loads_push_constants) 630 user_sgpr_info->sgpr_count += 2; 631 632 uint32_t available_sgprs = ctx->options->chip_class >= GFX9 ? 32 : 16; 633 uint32_t remaining_sgprs = available_sgprs - user_sgpr_info->sgpr_count; 634 635 if (remaining_sgprs / 2 < util_bitcount(ctx->shader_info->info.desc_set_used_mask)) { 636 user_sgpr_info->sgpr_count += 2; 637 user_sgpr_info->indirect_all_descriptor_sets = true; 638 } else { 639 user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2; 640 } 641 } 642 643 static void 644 declare_global_input_sgprs(struct nir_to_llvm_context *ctx, 645 gl_shader_stage stage, 646 bool has_previous_stage, 647 gl_shader_stage previous_stage, 648 const struct user_sgpr_info *user_sgpr_info, 649 struct arg_info *args, 650 LLVMValueRef *desc_sets) 651 { 652 LLVMTypeRef type = const_array(ctx->ac.i8, 1024 * 1024); 653 unsigned num_sets = ctx->options->layout ? 654 ctx->options->layout->num_sets : 0; 655 unsigned stage_mask = 1 << stage; 656 657 if (has_previous_stage) 658 stage_mask |= 1 << previous_stage; 659 660 /* 1 for each descriptor set */ 661 if (!user_sgpr_info->indirect_all_descriptor_sets) { 662 for (unsigned i = 0; i < num_sets; ++i) { 663 if (ctx->options->layout->set[i].layout->shader_stages & stage_mask) { 664 add_array_arg(args, type, 665 &ctx->descriptor_sets[i]); 666 } 667 } 668 } else { 669 add_array_arg(args, const_array(type, 32), desc_sets); 670 } 671 672 if (ctx->shader_info->info.loads_push_constants) { 673 /* 1 for push constants and dynamic descriptors */ 674 add_array_arg(args, type, &ctx->push_constants); 675 } 676 } 677 678 static void 679 declare_vs_specific_input_sgprs(struct nir_to_llvm_context *ctx, 680 gl_shader_stage stage, 681 bool has_previous_stage, 682 gl_shader_stage previous_stage, 683 struct arg_info *args) 684 { 685 if (!ctx->is_gs_copy_shader && 686 (stage == MESA_SHADER_VERTEX || 687 (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { 688 if (ctx->shader_info->info.vs.has_vertex_buffers) { 689 add_arg(args, ARG_SGPR, const_array(ctx->ac.v4i32, 16), 690 &ctx->vertex_buffers); 691 } 692 add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex); 693 add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.start_instance); 694 if (ctx->shader_info->info.vs.needs_draw_id) { 695 add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.draw_id); 696 } 697 } 698 } 699 700 static void 701 declare_vs_input_vgprs(struct nir_to_llvm_context *ctx, struct arg_info *args) 702 { 703 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.vertex_id); 704 if (!ctx->is_gs_copy_shader) { 705 if (ctx->options->key.vs.as_ls) { 706 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->rel_auto_id); 707 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id); 708 } else { 709 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id); 710 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->vs_prim_id); 711 } 712 add_arg(args, ARG_VGPR, ctx->ac.i32, NULL); /* unused */ 713 } 714 } 715 716 static void 717 declare_tes_input_vgprs(struct nir_to_llvm_context *ctx, struct arg_info *args) 718 { 719 add_arg(args, ARG_VGPR, ctx->ac.f32, &ctx->tes_u); 720 add_arg(args, ARG_VGPR, ctx->ac.f32, &ctx->tes_v); 721 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->tes_rel_patch_id); 722 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.tes_patch_id); 723 } 724 725 static void 726 set_global_input_locs(struct nir_to_llvm_context *ctx, gl_shader_stage stage, 727 bool has_previous_stage, gl_shader_stage previous_stage, 728 const struct user_sgpr_info *user_sgpr_info, 729 LLVMValueRef desc_sets, uint8_t *user_sgpr_idx) 730 { 731 unsigned num_sets = ctx->options->layout ? 732 ctx->options->layout->num_sets : 0; 733 unsigned stage_mask = 1 << stage; 734 735 if (has_previous_stage) 736 stage_mask |= 1 << previous_stage; 737 738 if (!user_sgpr_info->indirect_all_descriptor_sets) { 739 for (unsigned i = 0; i < num_sets; ++i) { 740 if (ctx->options->layout->set[i].layout->shader_stages & stage_mask) { 741 set_loc_desc(ctx, i, user_sgpr_idx, 0); 742 } else 743 ctx->descriptor_sets[i] = NULL; 744 } 745 } else { 746 set_loc_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, 747 user_sgpr_idx, 2); 748 749 for (unsigned i = 0; i < num_sets; ++i) { 750 if (ctx->options->layout->set[i].layout->shader_stages & stage_mask) { 751 set_loc_desc(ctx, i, user_sgpr_idx, i * 8); 752 ctx->descriptor_sets[i] = 753 ac_build_load_to_sgpr(&ctx->ac, 754 desc_sets, 755 LLVMConstInt(ctx->ac.i32, i, false)); 756 757 } else 758 ctx->descriptor_sets[i] = NULL; 759 } 760 ctx->shader_info->need_indirect_descriptor_sets = true; 761 } 762 763 if (ctx->shader_info->info.loads_push_constants) { 764 set_loc_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2); 765 } 766 } 767 768 static void 769 set_vs_specific_input_locs(struct nir_to_llvm_context *ctx, 770 gl_shader_stage stage, bool has_previous_stage, 771 gl_shader_stage previous_stage, 772 uint8_t *user_sgpr_idx) 773 { 774 if (!ctx->is_gs_copy_shader && 775 (stage == MESA_SHADER_VERTEX || 776 (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { 777 if (ctx->shader_info->info.vs.has_vertex_buffers) { 778 set_loc_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, 779 user_sgpr_idx, 2); 780 } 781 782 unsigned vs_num = 2; 783 if (ctx->shader_info->info.vs.needs_draw_id) 784 vs_num++; 785 786 set_loc_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, 787 user_sgpr_idx, vs_num); 788 } 789 } 790 791 static void create_function(struct nir_to_llvm_context *ctx, 792 gl_shader_stage stage, 793 bool has_previous_stage, 794 gl_shader_stage previous_stage) 795 { 796 uint8_t user_sgpr_idx; 797 struct user_sgpr_info user_sgpr_info; 798 struct arg_info args = {}; 799 LLVMValueRef desc_sets; 800 bool needs_view_index = needs_view_index_sgpr(ctx, stage); 801 allocate_user_sgprs(ctx, stage, needs_view_index, &user_sgpr_info); 802 803 if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) { 804 add_arg(&args, ARG_SGPR, const_array(ctx->ac.v4i32, 16), 805 &ctx->ring_offsets); 806 } 807 808 switch (stage) { 809 case MESA_SHADER_COMPUTE: 810 declare_global_input_sgprs(ctx, stage, has_previous_stage, 811 previous_stage, &user_sgpr_info, 812 &args, &desc_sets); 813 814 if (ctx->shader_info->info.cs.uses_grid_size) { 815 add_arg(&args, ARG_SGPR, ctx->ac.v3i32, 816 &ctx->num_work_groups); 817 } 818 819 for (int i = 0; i < 3; i++) { 820 ctx->workgroup_ids[i] = NULL; 821 if (ctx->shader_info->info.cs.uses_block_id[i]) { 822 add_arg(&args, ARG_SGPR, ctx->ac.i32, 823 &ctx->workgroup_ids[i]); 824 } 825 } 826 827 if (ctx->shader_info->info.cs.uses_local_invocation_idx) 828 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->tg_size); 829 add_arg(&args, ARG_VGPR, ctx->ac.v3i32, 830 &ctx->local_invocation_ids); 831 break; 832 case MESA_SHADER_VERTEX: 833 declare_global_input_sgprs(ctx, stage, has_previous_stage, 834 previous_stage, &user_sgpr_info, 835 &args, &desc_sets); 836 declare_vs_specific_input_sgprs(ctx, stage, has_previous_stage, 837 previous_stage, &args); 838 839 if (needs_view_index) 840 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->view_index); 841 if (ctx->options->key.vs.as_es) 842 add_arg(&args, ARG_SGPR, ctx->ac.i32, 843 &ctx->es2gs_offset); 844 else if (ctx->options->key.vs.as_ls) 845 add_arg(&args, ARG_SGPR, ctx->ac.i32, 846 &ctx->ls_out_layout); 847 848 declare_vs_input_vgprs(ctx, &args); 849 break; 850 case MESA_SHADER_TESS_CTRL: 851 if (has_previous_stage) { 852 // First 6 system regs 853 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); 854 add_arg(&args, ARG_SGPR, ctx->ac.i32, 855 &ctx->merged_wave_info); 856 add_arg(&args, ARG_SGPR, ctx->ac.i32, 857 &ctx->tess_factor_offset); 858 859 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // scratch offset 860 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown 861 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown 862 863 declare_global_input_sgprs(ctx, stage, 864 has_previous_stage, 865 previous_stage, 866 &user_sgpr_info, &args, 867 &desc_sets); 868 declare_vs_specific_input_sgprs(ctx, stage, 869 has_previous_stage, 870 previous_stage, &args); 871 872 add_arg(&args, ARG_SGPR, ctx->ac.i32, 873 &ctx->ls_out_layout); 874 875 add_arg(&args, ARG_SGPR, ctx->ac.i32, 876 &ctx->tcs_offchip_layout); 877 add_arg(&args, ARG_SGPR, ctx->ac.i32, 878 &ctx->tcs_out_offsets); 879 add_arg(&args, ARG_SGPR, ctx->ac.i32, 880 &ctx->tcs_out_layout); 881 add_arg(&args, ARG_SGPR, ctx->ac.i32, 882 &ctx->tcs_in_layout); 883 if (needs_view_index) 884 add_arg(&args, ARG_SGPR, ctx->ac.i32, 885 &ctx->view_index); 886 887 add_arg(&args, ARG_VGPR, ctx->ac.i32, 888 &ctx->abi.tcs_patch_id); 889 add_arg(&args, ARG_VGPR, ctx->ac.i32, 890 &ctx->abi.tcs_rel_ids); 891 892 declare_vs_input_vgprs(ctx, &args); 893 } else { 894 declare_global_input_sgprs(ctx, stage, 895 has_previous_stage, 896 previous_stage, 897 &user_sgpr_info, &args, 898 &desc_sets); 899 900 add_arg(&args, ARG_SGPR, ctx->ac.i32, 901 &ctx->tcs_offchip_layout); 902 add_arg(&args, ARG_SGPR, ctx->ac.i32, 903 &ctx->tcs_out_offsets); 904 add_arg(&args, ARG_SGPR, ctx->ac.i32, 905 &ctx->tcs_out_layout); 906 add_arg(&args, ARG_SGPR, ctx->ac.i32, 907 &ctx->tcs_in_layout); 908 if (needs_view_index) 909 add_arg(&args, ARG_SGPR, ctx->ac.i32, 910 &ctx->view_index); 911 912 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); 913 add_arg(&args, ARG_SGPR, ctx->ac.i32, 914 &ctx->tess_factor_offset); 915 add_arg(&args, ARG_VGPR, ctx->ac.i32, 916 &ctx->abi.tcs_patch_id); 917 add_arg(&args, ARG_VGPR, ctx->ac.i32, 918 &ctx->abi.tcs_rel_ids); 919 } 920 break; 921 case MESA_SHADER_TESS_EVAL: 922 declare_global_input_sgprs(ctx, stage, has_previous_stage, 923 previous_stage, &user_sgpr_info, 924 &args, &desc_sets); 925 926 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->tcs_offchip_layout); 927 if (needs_view_index) 928 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->view_index); 929 930 if (ctx->options->key.tes.as_es) { 931 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); 932 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); 933 add_arg(&args, ARG_SGPR, ctx->ac.i32, 934 &ctx->es2gs_offset); 935 } else { 936 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); 937 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); 938 } 939 declare_tes_input_vgprs(ctx, &args); 940 break; 941 case MESA_SHADER_GEOMETRY: 942 if (has_previous_stage) { 943 // First 6 system regs 944 add_arg(&args, ARG_SGPR, ctx->ac.i32, 945 &ctx->gs2vs_offset); 946 add_arg(&args, ARG_SGPR, ctx->ac.i32, 947 &ctx->merged_wave_info); 948 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); 949 950 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // scratch offset 951 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown 952 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown 953 954 declare_global_input_sgprs(ctx, stage, 955 has_previous_stage, 956 previous_stage, 957 &user_sgpr_info, &args, 958 &desc_sets); 959 960 if (previous_stage == MESA_SHADER_TESS_EVAL) { 961 add_arg(&args, ARG_SGPR, ctx->ac.i32, 962 &ctx->tcs_offchip_layout); 963 } else { 964 declare_vs_specific_input_sgprs(ctx, stage, 965 has_previous_stage, 966 previous_stage, 967 &args); 968 } 969 970 add_arg(&args, ARG_SGPR, ctx->ac.i32, 971 &ctx->gsvs_ring_stride); 972 add_arg(&args, ARG_SGPR, ctx->ac.i32, 973 &ctx->gsvs_num_entries); 974 if (needs_view_index) 975 add_arg(&args, ARG_SGPR, ctx->ac.i32, 976 &ctx->view_index); 977 978 add_arg(&args, ARG_VGPR, ctx->ac.i32, 979 &ctx->gs_vtx_offset[0]); 980 add_arg(&args, ARG_VGPR, ctx->ac.i32, 981 &ctx->gs_vtx_offset[2]); 982 add_arg(&args, ARG_VGPR, ctx->ac.i32, 983 &ctx->abi.gs_prim_id); 984 add_arg(&args, ARG_VGPR, ctx->ac.i32, 985 &ctx->abi.gs_invocation_id); 986 add_arg(&args, ARG_VGPR, ctx->ac.i32, 987 &ctx->gs_vtx_offset[4]); 988 989 if (previous_stage == MESA_SHADER_VERTEX) { 990 declare_vs_input_vgprs(ctx, &args); 991 } else { 992 declare_tes_input_vgprs(ctx, &args); 993 } 994 } else { 995 declare_global_input_sgprs(ctx, stage, 996 has_previous_stage, 997 previous_stage, 998 &user_sgpr_info, &args, 999 &desc_sets); 1000 1001 add_arg(&args, ARG_SGPR, ctx->ac.i32, 1002 &ctx->gsvs_ring_stride); 1003 add_arg(&args, ARG_SGPR, ctx->ac.i32, 1004 &ctx->gsvs_num_entries); 1005 if (needs_view_index) 1006 add_arg(&args, ARG_SGPR, ctx->ac.i32, 1007 &ctx->view_index); 1008 1009 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->gs2vs_offset); 1010 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->gs_wave_id); 1011 add_arg(&args, ARG_VGPR, ctx->ac.i32, 1012 &ctx->gs_vtx_offset[0]); 1013 add_arg(&args, ARG_VGPR, ctx->ac.i32, 1014 &ctx->gs_vtx_offset[1]); 1015 add_arg(&args, ARG_VGPR, ctx->ac.i32, 1016 &ctx->abi.gs_prim_id); 1017 add_arg(&args, ARG_VGPR, ctx->ac.i32, 1018 &ctx->gs_vtx_offset[2]); 1019 add_arg(&args, ARG_VGPR, ctx->ac.i32, 1020 &ctx->gs_vtx_offset[3]); 1021 add_arg(&args, ARG_VGPR, ctx->ac.i32, 1022 &ctx->gs_vtx_offset[4]); 1023 add_arg(&args, ARG_VGPR, ctx->ac.i32, 1024 &ctx->gs_vtx_offset[5]); 1025 add_arg(&args, ARG_VGPR, ctx->ac.i32, 1026 &ctx->abi.gs_invocation_id); 1027 } 1028 break; 1029 case MESA_SHADER_FRAGMENT: 1030 declare_global_input_sgprs(ctx, stage, has_previous_stage, 1031 previous_stage, &user_sgpr_info, 1032 &args, &desc_sets); 1033 1034 if (ctx->shader_info->info.ps.needs_sample_positions) 1035 add_arg(&args, ARG_SGPR, ctx->ac.i32, 1036 &ctx->sample_pos_offset); 1037 1038 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->prim_mask); 1039 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_sample); 1040 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_center); 1041 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_centroid); 1042 add_arg(&args, ARG_VGPR, ctx->ac.v3i32, NULL); /* persp pull model */ 1043 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_sample); 1044 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_center); 1045 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_centroid); 1046 add_arg(&args, ARG_VGPR, ctx->ac.f32, NULL); /* line stipple tex */ 1047 add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[0]); 1048 add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[1]); 1049 add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[2]); 1050 add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[3]); 1051 add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.front_face); 1052 add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.ancillary); 1053 add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.sample_coverage); 1054 add_arg(&args, ARG_VGPR, ctx->ac.i32, NULL); /* fixed pt */ 1055 break; 1056 default: 1057 unreachable("Shader stage not implemented"); 1058 } 1059 1060 ctx->main_function = create_llvm_function( 1061 ctx->context, ctx->module, ctx->builder, NULL, 0, &args, 1062 ctx->max_workgroup_size, 1063 ctx->options->unsafe_math); 1064 set_llvm_calling_convention(ctx->main_function, stage); 1065 1066 1067 ctx->shader_info->num_input_vgprs = 0; 1068 ctx->shader_info->num_input_sgprs = ctx->options->supports_spill ? 2 : 0; 1069 1070 ctx->shader_info->num_input_sgprs += args.num_sgprs_used; 1071 1072 if (ctx->stage != MESA_SHADER_FRAGMENT) 1073 ctx->shader_info->num_input_vgprs = args.num_vgprs_used; 1074 1075 assign_arguments(ctx->main_function, &args); 1076 1077 user_sgpr_idx = 0; 1078 1079 if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets) { 1080 set_loc_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS, 1081 &user_sgpr_idx, 2); 1082 if (ctx->options->supports_spill) { 1083 ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr", 1084 LLVMPointerType(ctx->ac.i8, CONST_ADDR_SPACE), 1085 NULL, 0, AC_FUNC_ATTR_READNONE); 1086 ctx->ring_offsets = LLVMBuildBitCast(ctx->builder, ctx->ring_offsets, 1087 const_array(ctx->ac.v4i32, 16), ""); 1088 } 1089 } 1090 1091 /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including 1092 * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */ 1093 if (has_previous_stage) 1094 user_sgpr_idx = 0; 1095 1096 set_global_input_locs(ctx, stage, has_previous_stage, previous_stage, 1097 &user_sgpr_info, desc_sets, &user_sgpr_idx); 1098 1099 switch (stage) { 1100 case MESA_SHADER_COMPUTE: 1101 if (ctx->shader_info->info.cs.uses_grid_size) { 1102 set_loc_shader(ctx, AC_UD_CS_GRID_SIZE, 1103 &user_sgpr_idx, 3); 1104 } 1105 break; 1106 case MESA_SHADER_VERTEX: 1107 set_vs_specific_input_locs(ctx, stage, has_previous_stage, 1108 previous_stage, &user_sgpr_idx); 1109 if (ctx->view_index) 1110 set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); 1111 if (ctx->options->key.vs.as_ls) { 1112 set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, 1113 &user_sgpr_idx, 1); 1114 } 1115 if (ctx->options->key.vs.as_ls) 1116 ac_declare_lds_as_pointer(&ctx->ac); 1117 break; 1118 case MESA_SHADER_TESS_CTRL: 1119 set_vs_specific_input_locs(ctx, stage, has_previous_stage, 1120 previous_stage, &user_sgpr_idx); 1121 if (has_previous_stage) 1122 set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, 1123 &user_sgpr_idx, 1); 1124 set_loc_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, &user_sgpr_idx, 4); 1125 if (ctx->view_index) 1126 set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); 1127 ac_declare_lds_as_pointer(&ctx->ac); 1128 break; 1129 case MESA_SHADER_TESS_EVAL: 1130 set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1); 1131 if (ctx->view_index) 1132 set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); 1133 break; 1134 case MESA_SHADER_GEOMETRY: 1135 if (has_previous_stage) { 1136 if (previous_stage == MESA_SHADER_VERTEX) 1137 set_vs_specific_input_locs(ctx, stage, 1138 has_previous_stage, 1139 previous_stage, 1140 &user_sgpr_idx); 1141 else 1142 set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, 1143 &user_sgpr_idx, 1); 1144 } 1145 set_loc_shader(ctx, AC_UD_GS_VS_RING_STRIDE_ENTRIES, 1146 &user_sgpr_idx, 2); 1147 if (ctx->view_index) 1148 set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); 1149 if (has_previous_stage) 1150 ac_declare_lds_as_pointer(&ctx->ac); 1151 break; 1152 case MESA_SHADER_FRAGMENT: 1153 if (ctx->shader_info->info.ps.needs_sample_positions) { 1154 set_loc_shader(ctx, AC_UD_PS_SAMPLE_POS_OFFSET, 1155 &user_sgpr_idx, 1); 1156 } 1157 break; 1158 default: 1159 unreachable("Shader stage not implemented"); 1160 } 1161 1162 ctx->shader_info->num_user_sgprs = user_sgpr_idx; 1163 } 1164 1165 static LLVMValueRef trim_vector(struct ac_llvm_context *ctx, 1166 LLVMValueRef value, unsigned count) 1167 { 1168 unsigned num_components = ac_get_llvm_num_components(value); 1169 if (count == num_components) 1170 return value; 1171 1172 LLVMValueRef masks[] = { 1173 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false), 1174 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)}; 1175 1176 if (count == 1) 1177 return LLVMBuildExtractElement(ctx->builder, value, masks[0], 1178 ""); 1179 1180 LLVMValueRef swizzle = LLVMConstVector(masks, count); 1181 return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, ""); 1182 } 1183 1184 static void 1185 build_store_values_extended(struct ac_llvm_context *ac, 1186 LLVMValueRef *values, 1187 unsigned value_count, 1188 unsigned value_stride, 1189 LLVMValueRef vec) 1190 { 1191 LLVMBuilderRef builder = ac->builder; 1192 unsigned i; 1193 1194 for (i = 0; i < value_count; i++) { 1195 LLVMValueRef ptr = values[i * value_stride]; 1196 LLVMValueRef index = LLVMConstInt(ac->i32, i, false); 1197 LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, ""); 1198 LLVMBuildStore(builder, value, ptr); 1199 } 1200 } 1201 1202 static LLVMTypeRef get_def_type(struct ac_nir_context *ctx, 1203 const nir_ssa_def *def) 1204 { 1205 LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size); 1206 if (def->num_components > 1) { 1207 type = LLVMVectorType(type, def->num_components); 1208 } 1209 return type; 1210 } 1211 1212 static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src) 1213 { 1214 assert(src.is_ssa); 1215 struct hash_entry *entry = _mesa_hash_table_search(nir->defs, src.ssa); 1216 return (LLVMValueRef)entry->data; 1217 } 1218 1219 1220 static LLVMBasicBlockRef get_block(struct ac_nir_context *nir, 1221 const struct nir_block *b) 1222 { 1223 struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b); 1224 return (LLVMBasicBlockRef)entry->data; 1225 } 1226 1227 static LLVMValueRef get_alu_src(struct ac_nir_context *ctx, 1228 nir_alu_src src, 1229 unsigned num_components) 1230 { 1231 LLVMValueRef value = get_src(ctx, src.src); 1232 bool need_swizzle = false; 1233 1234 assert(value); 1235 LLVMTypeRef type = LLVMTypeOf(value); 1236 unsigned src_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind 1237 ? LLVMGetVectorSize(type) 1238 : 1; 1239 1240 for (unsigned i = 0; i < num_components; ++i) { 1241 assert(src.swizzle[i] < src_components); 1242 if (src.swizzle[i] != i) 1243 need_swizzle = true; 1244 } 1245 1246 if (need_swizzle || num_components != src_components) { 1247 LLVMValueRef masks[] = { 1248 LLVMConstInt(ctx->ac.i32, src.swizzle[0], false), 1249 LLVMConstInt(ctx->ac.i32, src.swizzle[1], false), 1250 LLVMConstInt(ctx->ac.i32, src.swizzle[2], false), 1251 LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)}; 1252 1253 if (src_components > 1 && num_components == 1) { 1254 value = LLVMBuildExtractElement(ctx->ac.builder, value, 1255 masks[0], ""); 1256 } else if (src_components == 1 && num_components > 1) { 1257 LLVMValueRef values[] = {value, value, value, value}; 1258 value = ac_build_gather_values(&ctx->ac, values, num_components); 1259 } else { 1260 LLVMValueRef swizzle = LLVMConstVector(masks, num_components); 1261 value = LLVMBuildShuffleVector(ctx->ac.builder, value, value, 1262 swizzle, ""); 1263 } 1264 } 1265 assert(!src.negate); 1266 assert(!src.abs); 1267 return value; 1268 } 1269 1270 static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx, 1271 LLVMIntPredicate pred, LLVMValueRef src0, 1272 LLVMValueRef src1) 1273 { 1274 LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, ""); 1275 return LLVMBuildSelect(ctx->builder, result, 1276 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false), 1277 ctx->i32_0, ""); 1278 } 1279 1280 static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx, 1281 LLVMRealPredicate pred, LLVMValueRef src0, 1282 LLVMValueRef src1) 1283 { 1284 LLVMValueRef result; 1285 src0 = ac_to_float(ctx, src0); 1286 src1 = ac_to_float(ctx, src1); 1287 result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, ""); 1288 return LLVMBuildSelect(ctx->builder, result, 1289 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false), 1290 ctx->i32_0, ""); 1291 } 1292 1293 static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx, 1294 const char *intrin, 1295 LLVMTypeRef result_type, 1296 LLVMValueRef src0) 1297 { 1298 char name[64]; 1299 LLVMValueRef params[] = { 1300 ac_to_float(ctx, src0), 1301 }; 1302 1303 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin, 1304 get_elem_bits(ctx, result_type)); 1305 assert(length < sizeof(name)); 1306 return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE); 1307 } 1308 1309 static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx, 1310 const char *intrin, 1311 LLVMTypeRef result_type, 1312 LLVMValueRef src0, LLVMValueRef src1) 1313 { 1314 char name[64]; 1315 LLVMValueRef params[] = { 1316 ac_to_float(ctx, src0), 1317 ac_to_float(ctx, src1), 1318 }; 1319 1320 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin, 1321 get_elem_bits(ctx, result_type)); 1322 assert(length < sizeof(name)); 1323 return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE); 1324 } 1325 1326 static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx, 1327 const char *intrin, 1328 LLVMTypeRef result_type, 1329 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2) 1330 { 1331 char name[64]; 1332 LLVMValueRef params[] = { 1333 ac_to_float(ctx, src0), 1334 ac_to_float(ctx, src1), 1335 ac_to_float(ctx, src2), 1336 }; 1337 1338 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin, 1339 get_elem_bits(ctx, result_type)); 1340 assert(length < sizeof(name)); 1341 return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE); 1342 } 1343 1344 static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx, 1345 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2) 1346 { 1347 LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, 1348 ctx->i32_0, ""); 1349 return LLVMBuildSelect(ctx->builder, v, ac_to_integer(ctx, src1), 1350 ac_to_integer(ctx, src2), ""); 1351 } 1352 1353 static LLVMValueRef emit_minmax_int(struct ac_llvm_context *ctx, 1354 LLVMIntPredicate pred, 1355 LLVMValueRef src0, LLVMValueRef src1) 1356 { 1357 return LLVMBuildSelect(ctx->builder, 1358 LLVMBuildICmp(ctx->builder, pred, src0, src1, ""), 1359 src0, 1360 src1, ""); 1361 1362 } 1363 static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx, 1364 LLVMValueRef src0) 1365 { 1366 return emit_minmax_int(ctx, LLVMIntSGT, src0, 1367 LLVMBuildNeg(ctx->builder, src0, "")); 1368 } 1369 1370 static LLVMValueRef emit_fsign(struct ac_llvm_context *ctx, 1371 LLVMValueRef src0, 1372 unsigned bitsize) 1373 { 1374 LLVMValueRef cmp, val, zero, one; 1375 LLVMTypeRef type; 1376 1377 if (bitsize == 32) { 1378 type = ctx->f32; 1379 zero = ctx->f32_0; 1380 one = ctx->f32_1; 1381 } else { 1382 type = ctx->f64; 1383 zero = ctx->f64_0; 1384 one = ctx->f64_1; 1385 } 1386 1387 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, zero, ""); 1388 val = LLVMBuildSelect(ctx->builder, cmp, one, src0, ""); 1389 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, zero, ""); 1390 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(type, -1.0), ""); 1391 return val; 1392 } 1393 1394 static LLVMValueRef emit_isign(struct ac_llvm_context *ctx, 1395 LLVMValueRef src0, unsigned bitsize) 1396 { 1397 LLVMValueRef cmp, val, zero, one; 1398 LLVMTypeRef type; 1399 1400 if (bitsize == 32) { 1401 type = ctx->i32; 1402 zero = ctx->i32_0; 1403 one = ctx->i32_1; 1404 } else { 1405 type = ctx->i64; 1406 zero = ctx->i64_0; 1407 one = ctx->i64_1; 1408 } 1409 1410 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, zero, ""); 1411 val = LLVMBuildSelect(ctx->builder, cmp, one, src0, ""); 1412 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, zero, ""); 1413 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(type, -1, true), ""); 1414 return val; 1415 } 1416 1417 static LLVMValueRef emit_ffract(struct ac_llvm_context *ctx, 1418 LLVMValueRef src0) 1419 { 1420 const char *intr = "llvm.floor.f32"; 1421 LLVMValueRef fsrc0 = ac_to_float(ctx, src0); 1422 LLVMValueRef params[] = { 1423 fsrc0, 1424 }; 1425 LLVMValueRef floor = ac_build_intrinsic(ctx, intr, 1426 ctx->f32, params, 1, 1427 AC_FUNC_ATTR_READNONE); 1428 return LLVMBuildFSub(ctx->builder, fsrc0, floor, ""); 1429 } 1430 1431 static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx, 1432 const char *intrin, 1433 LLVMValueRef src0, LLVMValueRef src1) 1434 { 1435 LLVMTypeRef ret_type; 1436 LLVMTypeRef types[] = { ctx->i32, ctx->i1 }; 1437 LLVMValueRef res; 1438 LLVMValueRef params[] = { src0, src1 }; 1439 ret_type = LLVMStructTypeInContext(ctx->context, types, 1440 2, true); 1441 1442 res = ac_build_intrinsic(ctx, intrin, ret_type, 1443 params, 2, AC_FUNC_ATTR_READNONE); 1444 1445 res = LLVMBuildExtractValue(ctx->builder, res, 1, ""); 1446 res = LLVMBuildZExt(ctx->builder, res, ctx->i32, ""); 1447 return res; 1448 } 1449 1450 static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx, 1451 LLVMValueRef src0) 1452 { 1453 return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), ""); 1454 } 1455 1456 static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx, 1457 LLVMValueRef src0) 1458 { 1459 src0 = ac_to_float(ctx, src0); 1460 return LLVMBuildSExt(ctx->builder, 1461 LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, ctx->f32_0, ""), 1462 ctx->i32, ""); 1463 } 1464 1465 static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx, 1466 LLVMValueRef src0, 1467 unsigned bitsize) 1468 { 1469 LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, ""); 1470 1471 if (bitsize == 32) 1472 return result; 1473 1474 return LLVMBuildZExt(ctx->builder, result, ctx->i64, ""); 1475 } 1476 1477 static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx, 1478 LLVMValueRef src0) 1479 { 1480 return LLVMBuildSExt(ctx->builder, 1481 LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, ctx->i32_0, ""), 1482 ctx->i32, ""); 1483 } 1484 1485 static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx, 1486 LLVMValueRef src0) 1487 { 1488 LLVMValueRef result; 1489 LLVMValueRef cond = NULL; 1490 1491 src0 = ac_to_float(&ctx->ac, src0); 1492 result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->ac.f16, ""); 1493 1494 if (ctx->options->chip_class >= VI) { 1495 LLVMValueRef args[2]; 1496 /* Check if the result is a denormal - and flush to 0 if so. */ 1497 args[0] = result; 1498 args[1] = LLVMConstInt(ctx->ac.i32, N_SUBNORMAL | P_SUBNORMAL, false); 1499 cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f16", ctx->ac.i1, args, 2, AC_FUNC_ATTR_READNONE); 1500 } 1501 1502 /* need to convert back up to f32 */ 1503 result = LLVMBuildFPExt(ctx->builder, result, ctx->ac.f32, ""); 1504 1505 if (ctx->options->chip_class >= VI) 1506 result = LLVMBuildSelect(ctx->builder, cond, ctx->ac.f32_0, result, ""); 1507 else { 1508 /* for SI/CIK */ 1509 /* 0x38800000 is smallest half float value (2^-14) in 32-bit float, 1510 * so compare the result and flush to 0 if it's smaller. 1511 */ 1512 LLVMValueRef temp, cond2; 1513 temp = emit_intrin_1f_param(&ctx->ac, "llvm.fabs", 1514 ctx->ac.f32, result); 1515 cond = LLVMBuildFCmp(ctx->builder, LLVMRealUGT, 1516 LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->ac.i32, 0x38800000, false), ctx->ac.f32, ""), 1517 temp, ""); 1518 cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealUNE, 1519 temp, ctx->ac.f32_0, ""); 1520 cond = LLVMBuildAnd(ctx->builder, cond, cond2, ""); 1521 result = LLVMBuildSelect(ctx->builder, cond, ctx->ac.f32_0, result, ""); 1522 } 1523 return result; 1524 } 1525 1526 static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx, 1527 LLVMValueRef src0, LLVMValueRef src1) 1528 { 1529 LLVMValueRef dst64, result; 1530 src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, ""); 1531 src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, ""); 1532 1533 dst64 = LLVMBuildMul(ctx->builder, src0, src1, ""); 1534 dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), ""); 1535 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, ""); 1536 return result; 1537 } 1538 1539 static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx, 1540 LLVMValueRef src0, LLVMValueRef src1) 1541 { 1542 LLVMValueRef dst64, result; 1543 src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, ""); 1544 src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, ""); 1545 1546 dst64 = LLVMBuildMul(ctx->builder, src0, src1, ""); 1547 dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), ""); 1548 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, ""); 1549 return result; 1550 } 1551 1552 static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx, 1553 bool is_signed, 1554 const LLVMValueRef srcs[3]) 1555 { 1556 LLVMValueRef result; 1557 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), ""); 1558 1559 result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed); 1560 result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, ""); 1561 return result; 1562 } 1563 1564 static LLVMValueRef emit_bitfield_insert(struct ac_llvm_context *ctx, 1565 LLVMValueRef src0, LLVMValueRef src1, 1566 LLVMValueRef src2, LLVMValueRef src3) 1567 { 1568 LLVMValueRef bfi_args[3], result; 1569 1570 bfi_args[0] = LLVMBuildShl(ctx->builder, 1571 LLVMBuildSub(ctx->builder, 1572 LLVMBuildShl(ctx->builder, 1573 ctx->i32_1, 1574 src3, ""), 1575 ctx->i32_1, ""), 1576 src2, ""); 1577 bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, ""); 1578 bfi_args[2] = src0; 1579 1580 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), ""); 1581 1582 /* Calculate: 1583 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2) 1584 * Use the right-hand side, which the LLVM backend can convert to V_BFI. 1585 */ 1586 result = LLVMBuildXor(ctx->builder, bfi_args[2], 1587 LLVMBuildAnd(ctx->builder, bfi_args[0], 1588 LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), ""); 1589 1590 result = LLVMBuildSelect(ctx->builder, icond, src1, result, ""); 1591 return result; 1592 } 1593 1594 static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx, 1595 LLVMValueRef src0) 1596 { 1597 LLVMValueRef comp[2]; 1598 1599 src0 = ac_to_float(ctx, src0); 1600 comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, ""); 1601 comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, ""); 1602 1603 return ac_build_cvt_pkrtz_f16(ctx, comp); 1604 } 1605 1606 static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx, 1607 LLVMValueRef src0) 1608 { 1609 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false); 1610 LLVMValueRef temps[2], result, val; 1611 int i; 1612 1613 for (i = 0; i < 2; i++) { 1614 val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0; 1615 val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, ""); 1616 val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, ""); 1617 temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, ""); 1618 } 1619 1620 result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0], 1621 ctx->i32_0, ""); 1622 result = LLVMBuildInsertElement(ctx->builder, result, temps[1], 1623 ctx->i32_1, ""); 1624 return result; 1625 } 1626 1627 static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx, 1628 nir_op op, 1629 LLVMValueRef src0) 1630 { 1631 unsigned mask; 1632 int idx; 1633 LLVMValueRef result; 1634 1635 if (op == nir_op_fddx_fine || op == nir_op_fddx) 1636 mask = AC_TID_MASK_LEFT; 1637 else if (op == nir_op_fddy_fine || op == nir_op_fddy) 1638 mask = AC_TID_MASK_TOP; 1639 else 1640 mask = AC_TID_MASK_TOP_LEFT; 1641 1642 /* for DDX we want to next X pixel, DDY next Y pixel. */ 1643 if (op == nir_op_fddx_fine || 1644 op == nir_op_fddx_coarse || 1645 op == nir_op_fddx) 1646 idx = 1; 1647 else 1648 idx = 2; 1649 1650 result = ac_build_ddxy(&ctx->ac, mask, idx, src0); 1651 return result; 1652 } 1653 1654 /* 1655 * this takes an I,J coordinate pair, 1656 * and works out the X and Y derivatives. 1657 * it returns DDX(I), DDX(J), DDY(I), DDY(J). 1658 */ 1659 static LLVMValueRef emit_ddxy_interp( 1660 struct ac_nir_context *ctx, 1661 LLVMValueRef interp_ij) 1662 { 1663 LLVMValueRef result[4], a; 1664 unsigned i; 1665 1666 for (i = 0; i < 2; i++) { 1667 a = LLVMBuildExtractElement(ctx->ac.builder, interp_ij, 1668 LLVMConstInt(ctx->ac.i32, i, false), ""); 1669 result[i] = emit_ddxy(ctx, nir_op_fddx, a); 1670 result[2+i] = emit_ddxy(ctx, nir_op_fddy, a); 1671 } 1672 return ac_build_gather_values(&ctx->ac, result, 4); 1673 } 1674 1675 static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) 1676 { 1677 LLVMValueRef src[4], result = NULL; 1678 unsigned num_components = instr->dest.dest.ssa.num_components; 1679 unsigned src_components; 1680 LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa); 1681 1682 assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src)); 1683 switch (instr->op) { 1684 case nir_op_vec2: 1685 case nir_op_vec3: 1686 case nir_op_vec4: 1687 src_components = 1; 1688 break; 1689 case nir_op_pack_half_2x16: 1690 src_components = 2; 1691 break; 1692 case nir_op_unpack_half_2x16: 1693 src_components = 1; 1694 break; 1695 default: 1696 src_components = num_components; 1697 break; 1698 } 1699 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) 1700 src[i] = get_alu_src(ctx, instr->src[i], src_components); 1701 1702 switch (instr->op) { 1703 case nir_op_fmov: 1704 case nir_op_imov: 1705 result = src[0]; 1706 break; 1707 case nir_op_fneg: 1708 src[0] = ac_to_float(&ctx->ac, src[0]); 1709 result = LLVMBuildFNeg(ctx->ac.builder, src[0], ""); 1710 break; 1711 case nir_op_ineg: 1712 result = LLVMBuildNeg(ctx->ac.builder, src[0], ""); 1713 break; 1714 case nir_op_inot: 1715 result = LLVMBuildNot(ctx->ac.builder, src[0], ""); 1716 break; 1717 case nir_op_iadd: 1718 result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], ""); 1719 break; 1720 case nir_op_fadd: 1721 src[0] = ac_to_float(&ctx->ac, src[0]); 1722 src[1] = ac_to_float(&ctx->ac, src[1]); 1723 result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], ""); 1724 break; 1725 case nir_op_fsub: 1726 src[0] = ac_to_float(&ctx->ac, src[0]); 1727 src[1] = ac_to_float(&ctx->ac, src[1]); 1728 result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], ""); 1729 break; 1730 case nir_op_isub: 1731 result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], ""); 1732 break; 1733 case nir_op_imul: 1734 result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], ""); 1735 break; 1736 case nir_op_imod: 1737 result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], ""); 1738 break; 1739 case nir_op_umod: 1740 result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], ""); 1741 break; 1742 case nir_op_fmod: 1743 src[0] = ac_to_float(&ctx->ac, src[0]); 1744 src[1] = ac_to_float(&ctx->ac, src[1]); 1745 result = ac_build_fdiv(&ctx->ac, src[0], src[1]); 1746 result = emit_intrin_1f_param(&ctx->ac, "llvm.floor", 1747 ac_to_float_type(&ctx->ac, def_type), result); 1748 result = LLVMBuildFMul(ctx->ac.builder, src[1] , result, ""); 1749 result = LLVMBuildFSub(ctx->ac.builder, src[0], result, ""); 1750 break; 1751 case nir_op_frem: 1752 src[0] = ac_to_float(&ctx->ac, src[0]); 1753 src[1] = ac_to_float(&ctx->ac, src[1]); 1754 result = LLVMBuildFRem(ctx->ac.builder, src[0], src[1], ""); 1755 break; 1756 case nir_op_irem: 1757 result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], ""); 1758 break; 1759 case nir_op_idiv: 1760 result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], ""); 1761 break; 1762 case nir_op_udiv: 1763 result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], ""); 1764 break; 1765 case nir_op_fmul: 1766 src[0] = ac_to_float(&ctx->ac, src[0]); 1767 src[1] = ac_to_float(&ctx->ac, src[1]); 1768 result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], ""); 1769 break; 1770 case nir_op_fdiv: 1771 src[0] = ac_to_float(&ctx->ac, src[0]); 1772 src[1] = ac_to_float(&ctx->ac, src[1]); 1773 result = ac_build_fdiv(&ctx->ac, src[0], src[1]); 1774 break; 1775 case nir_op_frcp: 1776 src[0] = ac_to_float(&ctx->ac, src[0]); 1777 result = ac_build_fdiv(&ctx->ac, instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1, 1778 src[0]); 1779 break; 1780 case nir_op_iand: 1781 result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], ""); 1782 break; 1783 case nir_op_ior: 1784 result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], ""); 1785 break; 1786 case nir_op_ixor: 1787 result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], ""); 1788 break; 1789 case nir_op_ishl: 1790 result = LLVMBuildShl(ctx->ac.builder, src[0], 1791 LLVMBuildZExt(ctx->ac.builder, src[1], 1792 LLVMTypeOf(src[0]), ""), 1793 ""); 1794 break; 1795 case nir_op_ishr: 1796 result = LLVMBuildAShr(ctx->ac.builder, src[0], 1797 LLVMBuildZExt(ctx->ac.builder, src[1], 1798 LLVMTypeOf(src[0]), ""), 1799 ""); 1800 break; 1801 case nir_op_ushr: 1802 result = LLVMBuildLShr(ctx->ac.builder, src[0], 1803 LLVMBuildZExt(ctx->ac.builder, src[1], 1804 LLVMTypeOf(src[0]), ""), 1805 ""); 1806 break; 1807 case nir_op_ilt: 1808 result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]); 1809 break; 1810 case nir_op_ine: 1811 result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]); 1812 break; 1813 case nir_op_ieq: 1814 result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]); 1815 break; 1816 case nir_op_ige: 1817 result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]); 1818 break; 1819 case nir_op_ult: 1820 result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]); 1821 break; 1822 case nir_op_uge: 1823 result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]); 1824 break; 1825 case nir_op_feq: 1826 result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]); 1827 break; 1828 case nir_op_fne: 1829 result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]); 1830 break; 1831 case nir_op_flt: 1832 result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]); 1833 break; 1834 case nir_op_fge: 1835 result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]); 1836 break; 1837 case nir_op_fabs: 1838 result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs", 1839 ac_to_float_type(&ctx->ac, def_type), src[0]); 1840 break; 1841 case nir_op_iabs: 1842 result = emit_iabs(&ctx->ac, src[0]); 1843 break; 1844 case nir_op_imax: 1845 result = emit_minmax_int(&ctx->ac, LLVMIntSGT, src[0], src[1]); 1846 break; 1847 case nir_op_imin: 1848 result = emit_minmax_int(&ctx->ac, LLVMIntSLT, src[0], src[1]); 1849 break; 1850 case nir_op_umax: 1851 result = emit_minmax_int(&ctx->ac, LLVMIntUGT, src[0], src[1]); 1852 break; 1853 case nir_op_umin: 1854 result = emit_minmax_int(&ctx->ac, LLVMIntULT, src[0], src[1]); 1855 break; 1856 case nir_op_isign: 1857 result = emit_isign(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); 1858 break; 1859 case nir_op_fsign: 1860 src[0] = ac_to_float(&ctx->ac, src[0]); 1861 result = emit_fsign(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); 1862 break; 1863 case nir_op_ffloor: 1864 result = emit_intrin_1f_param(&ctx->ac, "llvm.floor", 1865 ac_to_float_type(&ctx->ac, def_type), src[0]); 1866 break; 1867 case nir_op_ftrunc: 1868 result = emit_intrin_1f_param(&ctx->ac, "llvm.trunc", 1869 ac_to_float_type(&ctx->ac, def_type), src[0]); 1870 break; 1871 case nir_op_fceil: 1872 result = emit_intrin_1f_param(&ctx->ac, "llvm.ceil", 1873 ac_to_float_type(&ctx->ac, def_type), src[0]); 1874 break; 1875 case nir_op_fround_even: 1876 result = emit_intrin_1f_param(&ctx->ac, "llvm.rint", 1877 ac_to_float_type(&ctx->ac, def_type),src[0]); 1878 break; 1879 case nir_op_ffract: 1880 result = emit_ffract(&ctx->ac, src[0]); 1881 break; 1882 case nir_op_fsin: 1883 result = emit_intrin_1f_param(&ctx->ac, "llvm.sin", 1884 ac_to_float_type(&ctx->ac, def_type), src[0]); 1885 break; 1886 case nir_op_fcos: 1887 result = emit_intrin_1f_param(&ctx->ac, "llvm.cos", 1888 ac_to_float_type(&ctx->ac, def_type), src[0]); 1889 break; 1890 case nir_op_fsqrt: 1891 result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", 1892 ac_to_float_type(&ctx->ac, def_type), src[0]); 1893 break; 1894 case nir_op_fexp2: 1895 result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2", 1896 ac_to_float_type(&ctx->ac, def_type), src[0]); 1897 break; 1898 case nir_op_flog2: 1899 result = emit_intrin_1f_param(&ctx->ac, "llvm.log2", 1900 ac_to_float_type(&ctx->ac, def_type), src[0]); 1901 break; 1902 case nir_op_frsq: 1903 result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", 1904 ac_to_float_type(&ctx->ac, def_type), src[0]); 1905 result = ac_build_fdiv(&ctx->ac, instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1, 1906 result); 1907 break; 1908 case nir_op_fpow: 1909 result = emit_intrin_2f_param(&ctx->ac, "llvm.pow", 1910 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]); 1911 break; 1912 case nir_op_fmax: 1913 result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", 1914 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]); 1915 if (instr->dest.dest.ssa.bit_size == 32) 1916 result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize", 1917 ac_to_float_type(&ctx->ac, def_type), 1918 result); 1919 break; 1920 case nir_op_fmin: 1921 result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum", 1922 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]); 1923 if (instr->dest.dest.ssa.bit_size == 32) 1924 result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize", 1925 ac_to_float_type(&ctx->ac, def_type), 1926 result); 1927 break; 1928 case nir_op_ffma: 1929 result = emit_intrin_3f_param(&ctx->ac, "llvm.fmuladd", 1930 ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]); 1931 break; 1932 case nir_op_ibitfield_extract: 1933 result = emit_bitfield_extract(&ctx->ac, true, src); 1934 break; 1935 case nir_op_ubitfield_extract: 1936 result = emit_bitfield_extract(&ctx->ac, false, src); 1937 break; 1938 case nir_op_bitfield_insert: 1939 result = emit_bitfield_insert(&ctx->ac, src[0], src[1], src[2], src[3]); 1940 break; 1941 case nir_op_bitfield_reverse: 1942 result = ac_build_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE); 1943 break; 1944 case nir_op_bit_count: 1945 result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE); 1946 break; 1947 case nir_op_vec2: 1948 case nir_op_vec3: 1949 case nir_op_vec4: 1950 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) 1951 src[i] = ac_to_integer(&ctx->ac, src[i]); 1952 result = ac_build_gather_values(&ctx->ac, src, num_components); 1953 break; 1954 case nir_op_f2i32: 1955 case nir_op_f2i64: 1956 src[0] = ac_to_float(&ctx->ac, src[0]); 1957 result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, ""); 1958 break; 1959 case nir_op_f2u32: 1960 case nir_op_f2u64: 1961 src[0] = ac_to_float(&ctx->ac, src[0]); 1962 result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, ""); 1963 break; 1964 case nir_op_i2f32: 1965 case nir_op_i2f64: 1966 src[0] = ac_to_integer(&ctx->ac, src[0]); 1967 result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); 1968 break; 1969 case nir_op_u2f32: 1970 case nir_op_u2f64: 1971 src[0] = ac_to_integer(&ctx->ac, src[0]); 1972 result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); 1973 break; 1974 case nir_op_f2f64: 1975 src[0] = ac_to_float(&ctx->ac, src[0]); 1976 result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); 1977 break; 1978 case nir_op_f2f32: 1979 result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); 1980 break; 1981 case nir_op_u2u32: 1982 case nir_op_u2u64: 1983 src[0] = ac_to_integer(&ctx->ac, src[0]); 1984 if (get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->ac, def_type)) 1985 result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, ""); 1986 else 1987 result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, ""); 1988 break; 1989 case nir_op_i2i32: 1990 case nir_op_i2i64: 1991 src[0] = ac_to_integer(&ctx->ac, src[0]); 1992 if (get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->ac, def_type)) 1993 result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, ""); 1994 else 1995 result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, ""); 1996 break; 1997 case nir_op_bcsel: 1998 result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]); 1999 break; 2000 case nir_op_find_lsb: 2001 src[0] = ac_to_integer(&ctx->ac, src[0]); 2002 result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]); 2003 break; 2004 case nir_op_ufind_msb: 2005 src[0] = ac_to_integer(&ctx->ac, src[0]); 2006 result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32); 2007 break; 2008 case nir_op_ifind_msb: 2009 src[0] = ac_to_integer(&ctx->ac, src[0]); 2010 result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32); 2011 break; 2012 case nir_op_uadd_carry: 2013 src[0] = ac_to_integer(&ctx->ac, src[0]); 2014 src[1] = ac_to_integer(&ctx->ac, src[1]); 2015 result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]); 2016 break; 2017 case nir_op_usub_borrow: 2018 src[0] = ac_to_integer(&ctx->ac, src[0]); 2019 src[1] = ac_to_integer(&ctx->ac, src[1]); 2020 result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]); 2021 break; 2022 case nir_op_b2f: 2023 result = emit_b2f(&ctx->ac, src[0]); 2024 break; 2025 case nir_op_f2b: 2026 result = emit_f2b(&ctx->ac, src[0]); 2027 break; 2028 case nir_op_b2i: 2029 result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); 2030 break; 2031 case nir_op_i2b: 2032 src[0] = ac_to_integer(&ctx->ac, src[0]); 2033 result = emit_i2b(&ctx->ac, src[0]); 2034 break; 2035 case nir_op_fquantize2f16: 2036 result = emit_f2f16(ctx->nctx, src[0]); 2037 break; 2038 case nir_op_umul_high: 2039 src[0] = ac_to_integer(&ctx->ac, src[0]); 2040 src[1] = ac_to_integer(&ctx->ac, src[1]); 2041 result = emit_umul_high(&ctx->ac, src[0], src[1]); 2042 break; 2043 case nir_op_imul_high: 2044 src[0] = ac_to_integer(&ctx->ac, src[0]); 2045 src[1] = ac_to_integer(&ctx->ac, src[1]); 2046 result = emit_imul_high(&ctx->ac, src[0], src[1]); 2047 break; 2048 case nir_op_pack_half_2x16: 2049 result = emit_pack_half_2x16(&ctx->ac, src[0]); 2050 break; 2051 case nir_op_unpack_half_2x16: 2052 result = emit_unpack_half_2x16(&ctx->ac, src[0]); 2053 break; 2054 case nir_op_fddx: 2055 case nir_op_fddy: 2056 case nir_op_fddx_fine: 2057 case nir_op_fddy_fine: 2058 case nir_op_fddx_coarse: 2059 case nir_op_fddy_coarse: 2060 result = emit_ddxy(ctx, instr->op, src[0]); 2061 break; 2062 2063 case nir_op_unpack_64_2x32_split_x: { 2064 assert(instr->src[0].src.ssa->num_components == 1); 2065 LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], 2066 ctx->ac.v2i32, 2067 ""); 2068 result = LLVMBuildExtractElement(ctx->ac.builder, tmp, 2069 ctx->ac.i32_0, ""); 2070 break; 2071 } 2072 2073 case nir_op_unpack_64_2x32_split_y: { 2074 assert(instr->src[0].src.ssa->num_components == 1); 2075 LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], 2076 ctx->ac.v2i32, 2077 ""); 2078 result = LLVMBuildExtractElement(ctx->ac.builder, tmp, 2079 ctx->ac.i32_1, ""); 2080 break; 2081 } 2082 2083 case nir_op_pack_64_2x32_split: { 2084 LLVMValueRef tmp = LLVMGetUndef(ctx->ac.v2i32); 2085 tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp, 2086 src[0], ctx->ac.i32_0, ""); 2087 tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp, 2088 src[1], ctx->ac.i32_1, ""); 2089 result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, ""); 2090 break; 2091 } 2092 2093 default: 2094 fprintf(stderr, "Unknown NIR alu instr: "); 2095 nir_print_instr(&instr->instr, stderr); 2096 fprintf(stderr, "\n"); 2097 abort(); 2098 } 2099 2100 if (result) { 2101 assert(instr->dest.dest.is_ssa); 2102 result = ac_to_integer(&ctx->ac, result); 2103 _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa, 2104 result); 2105 } 2106 } 2107 2108 static void visit_load_const(struct ac_nir_context *ctx, 2109 const nir_load_const_instr *instr) 2110 { 2111 LLVMValueRef values[4], value = NULL; 2112 LLVMTypeRef element_type = 2113 LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size); 2114 2115 for (unsigned i = 0; i < instr->def.num_components; ++i) { 2116 switch (instr->def.bit_size) { 2117 case 32: 2118 values[i] = LLVMConstInt(element_type, 2119 instr->value.u32[i], false); 2120 break; 2121 case 64: 2122 values[i] = LLVMConstInt(element_type, 2123 instr->value.u64[i], false); 2124 break; 2125 default: 2126 fprintf(stderr, 2127 "unsupported nir load_const bit_size: %d\n", 2128 instr->def.bit_size); 2129 abort(); 2130 } 2131 } 2132 if (instr->def.num_components > 1) { 2133 value = LLVMConstVector(values, instr->def.num_components); 2134 } else 2135 value = values[0]; 2136 2137 _mesa_hash_table_insert(ctx->defs, &instr->def, value); 2138 } 2139 2140 static LLVMValueRef cast_ptr(struct nir_to_llvm_context *ctx, LLVMValueRef ptr, 2141 LLVMTypeRef type) 2142 { 2143 int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); 2144 return LLVMBuildBitCast(ctx->builder, ptr, 2145 LLVMPointerType(type, addr_space), ""); 2146 } 2147 2148 static LLVMValueRef 2149 get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor, bool in_elements) 2150 { 2151 LLVMValueRef size = 2152 LLVMBuildExtractElement(ctx->ac.builder, descriptor, 2153 LLVMConstInt(ctx->ac.i32, 2, false), ""); 2154 2155 /* VI only */ 2156 if (ctx->ac.chip_class == VI && in_elements) { 2157 /* On VI, the descriptor contains the size in bytes, 2158 * but TXQ must return the size in elements. 2159 * The stride is always non-zero for resources using TXQ. 2160 */ 2161 LLVMValueRef stride = 2162 LLVMBuildExtractElement(ctx->ac.builder, descriptor, 2163 ctx->ac.i32_1, ""); 2164 stride = LLVMBuildLShr(ctx->ac.builder, stride, 2165 LLVMConstInt(ctx->ac.i32, 16, false), ""); 2166 stride = LLVMBuildAnd(ctx->ac.builder, stride, 2167 LLVMConstInt(ctx->ac.i32, 0x3fff, false), ""); 2168 2169 size = LLVMBuildUDiv(ctx->ac.builder, size, stride, ""); 2170 } 2171 return size; 2172 } 2173 2174 /** 2175 * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with 2176 * intrinsic names). 2177 */ 2178 static void build_int_type_name( 2179 LLVMTypeRef type, 2180 char *buf, unsigned bufsize) 2181 { 2182 assert(bufsize >= 6); 2183 2184 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) 2185 snprintf(buf, bufsize, "v%ui32", 2186 LLVMGetVectorSize(type)); 2187 else 2188 strcpy(buf, "i32"); 2189 } 2190 2191 static LLVMValueRef radv_lower_gather4_integer(struct ac_llvm_context *ctx, 2192 struct ac_image_args *args, 2193 const nir_tex_instr *instr) 2194 { 2195 enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type); 2196 LLVMValueRef coord = args->addr; 2197 LLVMValueRef half_texel[2]; 2198 LLVMValueRef compare_cube_wa = NULL; 2199 LLVMValueRef result; 2200 int c; 2201 unsigned coord_vgpr_index = (unsigned)args->offset + (unsigned)args->compare; 2202 2203 //TODO Rect 2204 { 2205 struct ac_image_args txq_args = { 0 }; 2206 2207 txq_args.da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE; 2208 txq_args.opcode = ac_image_get_resinfo; 2209 txq_args.dmask = 0xf; 2210 txq_args.addr = ctx->i32_0; 2211 txq_args.resource = args->resource; 2212 LLVMValueRef size = ac_build_image_opcode(ctx, &txq_args); 2213 2214 for (c = 0; c < 2; c++) { 2215 half_texel[c] = LLVMBuildExtractElement(ctx->builder, size, 2216 LLVMConstInt(ctx->i32, c, false), ""); 2217 half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, ""); 2218 half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]); 2219 half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c], 2220 LLVMConstReal(ctx->f32, -0.5), ""); 2221 } 2222 } 2223 2224 LLVMValueRef orig_coords = args->addr; 2225 2226 for (c = 0; c < 2; c++) { 2227 LLVMValueRef tmp; 2228 LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0); 2229 tmp = LLVMBuildExtractElement(ctx->builder, coord, index, ""); 2230 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, ""); 2231 tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], ""); 2232 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, ""); 2233 coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, ""); 2234 } 2235 2236 2237 /* 2238 * Apparantly cube has issue with integer types that the workaround doesn't solve, 2239 * so this tests if the format is 8_8_8_8 and an integer type do an alternate 2240 * workaround by sampling using a scaled type and converting. 2241 * This is taken from amdgpu-pro shaders. 2242 */ 2243 /* NOTE this produces some ugly code compared to amdgpu-pro, 2244 * LLVM ends up dumping SGPRs into VGPRs to deal with the compare/select, 2245 * and then reads them back. -pro generates two selects, 2246 * one s_cmp for the descriptor rewriting 2247 * one v_cmp for the coordinate and result changes. 2248 */ 2249 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { 2250 LLVMValueRef tmp, tmp2; 2251 2252 /* workaround 8/8/8/8 uint/sint cube gather bug */ 2253 /* first detect it then change to a scaled read and f2i */ 2254 tmp = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, ""); 2255 tmp2 = tmp; 2256 2257 /* extract the DATA_FORMAT */ 2258 tmp = ac_build_bfe(ctx, tmp, LLVMConstInt(ctx->i32, 20, false), 2259 LLVMConstInt(ctx->i32, 6, false), false); 2260 2261 /* is the DATA_FORMAT == 8_8_8_8 */ 2262 compare_cube_wa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tmp, LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), ""); 2263 2264 if (stype == GLSL_TYPE_UINT) 2265 /* Create a NUM FORMAT - 0x2 or 0x4 - USCALED or UINT */ 2266 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0x8000000, false), 2267 LLVMConstInt(ctx->i32, 0x10000000, false), ""); 2268 else 2269 /* Create a NUM FORMAT - 0x3 or 0x5 - SSCALED or SINT */ 2270 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0xc000000, false), 2271 LLVMConstInt(ctx->i32, 0x14000000, false), ""); 2272 2273 /* replace the NUM FORMAT in the descriptor */ 2274 tmp2 = LLVMBuildAnd(ctx->builder, tmp2, LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false), ""); 2275 tmp2 = LLVMBuildOr(ctx->builder, tmp2, tmp, ""); 2276 2277 args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32_1, ""); 2278 2279 /* don't modify the coordinates for this case */ 2280 coord = LLVMBuildSelect(ctx->builder, compare_cube_wa, orig_coords, coord, ""); 2281 } 2282 args->addr = coord; 2283 result = ac_build_image_opcode(ctx, args); 2284 2285 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { 2286 LLVMValueRef tmp, tmp2; 2287 2288 /* if the cube workaround is in place, f2i the result. */ 2289 for (c = 0; c < 4; c++) { 2290 tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), ""); 2291 if (stype == GLSL_TYPE_UINT) 2292 tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, ""); 2293 else 2294 tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, ""); 2295 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, ""); 2296 tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, ""); 2297 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, tmp2, tmp, ""); 2298 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, ""); 2299 result = LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), ""); 2300 } 2301 } 2302 return result; 2303 } 2304 2305 static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, 2306 const nir_tex_instr *instr, 2307 bool lod_is_zero, 2308 struct ac_image_args *args) 2309 { 2310 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { 2311 if (ctx->abi->gfx9_stride_size_workaround) { 2312 return ac_build_buffer_load_format_gfx9_safe(&ctx->ac, 2313 args->resource, 2314 args->addr, 2315 ctx->ac.i32_0, 2316 true); 2317 } else { 2318 return ac_build_buffer_load_format(&ctx->ac, 2319 args->resource, 2320 args->addr, 2321 ctx->ac.i32_0, 2322 true); 2323 } 2324 } 2325 2326 args->opcode = ac_image_sample; 2327 args->compare = instr->is_shadow; 2328 2329 switch (instr->op) { 2330 case nir_texop_txf: 2331 case nir_texop_txf_ms: 2332 case nir_texop_samples_identical: 2333 args->opcode = lod_is_zero || 2334 instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? 2335 ac_image_load : ac_image_load_mip; 2336 args->compare = false; 2337 args->offset = false; 2338 break; 2339 case nir_texop_txb: 2340 args->bias = true; 2341 break; 2342 case nir_texop_txl: 2343 if (lod_is_zero) 2344 args->level_zero = true; 2345 else 2346 args->lod = true; 2347 break; 2348 case nir_texop_txs: 2349 case nir_texop_query_levels: 2350 args->opcode = ac_image_get_resinfo; 2351 break; 2352 case nir_texop_tex: 2353 if (ctx->stage != MESA_SHADER_FRAGMENT) 2354 args->level_zero = true; 2355 break; 2356 case nir_texop_txd: 2357 args->deriv = true; 2358 break; 2359 case nir_texop_tg4: 2360 args->opcode = ac_image_gather4; 2361 args->level_zero = true; 2362 break; 2363 case nir_texop_lod: 2364 args->opcode = ac_image_get_lod; 2365 args->compare = false; 2366 args->offset = false; 2367 break; 2368 default: 2369 break; 2370 } 2371 2372 if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= VI) { 2373 enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type); 2374 if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) { 2375 return radv_lower_gather4_integer(&ctx->ac, args, instr); 2376 } 2377 } 2378 return ac_build_image_opcode(&ctx->ac, args); 2379 } 2380 2381 static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx, 2382 nir_intrinsic_instr *instr) 2383 { 2384 LLVMValueRef index = get_src(ctx->nir, instr->src[0]); 2385 unsigned desc_set = nir_intrinsic_desc_set(instr); 2386 unsigned binding = nir_intrinsic_binding(instr); 2387 LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set]; 2388 struct radv_pipeline_layout *pipeline_layout = ctx->options->layout; 2389 struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout; 2390 unsigned base_offset = layout->binding[binding].offset; 2391 LLVMValueRef offset, stride; 2392 2393 if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || 2394 layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { 2395 unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start + 2396 layout->binding[binding].dynamic_offset_offset; 2397 desc_ptr = ctx->push_constants; 2398 base_offset = pipeline_layout->push_constant_size + 16 * idx; 2399 stride = LLVMConstInt(ctx->ac.i32, 16, false); 2400 } else 2401 stride = LLVMConstInt(ctx->ac.i32, layout->binding[binding].size, false); 2402 2403 offset = LLVMConstInt(ctx->ac.i32, base_offset, false); 2404 index = LLVMBuildMul(ctx->builder, index, stride, ""); 2405 offset = LLVMBuildAdd(ctx->builder, offset, index, ""); 2406 2407 desc_ptr = ac_build_gep0(&ctx->ac, desc_ptr, offset); 2408 desc_ptr = cast_ptr(ctx, desc_ptr, ctx->ac.v4i32); 2409 LLVMSetMetadata(desc_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md); 2410 2411 return desc_ptr; 2412 } 2413 2414 static LLVMValueRef visit_vulkan_resource_reindex(struct nir_to_llvm_context *ctx, 2415 nir_intrinsic_instr *instr) 2416 { 2417 LLVMValueRef ptr = get_src(ctx->nir, instr->src[0]); 2418 LLVMValueRef index = get_src(ctx->nir, instr->src[1]); 2419 2420 LLVMValueRef result = LLVMBuildGEP(ctx->builder, ptr, &index, 1, ""); 2421 LLVMSetMetadata(result, ctx->ac.uniform_md_kind, ctx->ac.empty_md); 2422 return result; 2423 } 2424 2425 static LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx, 2426 nir_intrinsic_instr *instr) 2427 { 2428 LLVMValueRef ptr, addr; 2429 2430 addr = LLVMConstInt(ctx->ac.i32, nir_intrinsic_base(instr), 0); 2431 addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx->nir, instr->src[0]), ""); 2432 2433 ptr = ac_build_gep0(&ctx->ac, ctx->push_constants, addr); 2434 ptr = cast_ptr(ctx, ptr, get_def_type(ctx->nir, &instr->dest.ssa)); 2435 2436 return LLVMBuildLoad(ctx->builder, ptr, ""); 2437 } 2438 2439 static LLVMValueRef visit_get_buffer_size(struct ac_nir_context *ctx, 2440 const nir_intrinsic_instr *instr) 2441 { 2442 LLVMValueRef index = get_src(ctx, instr->src[0]); 2443 2444 return get_buffer_size(ctx, ctx->abi->load_ssbo(ctx->abi, index, false), false); 2445 } 2446 2447 static uint32_t widen_mask(uint32_t mask, unsigned multiplier) 2448 { 2449 uint32_t new_mask = 0; 2450 for(unsigned i = 0; i < 32 && (1u << i) <= mask; ++i) 2451 if (mask & (1u << i)) 2452 new_mask |= ((1u << multiplier) - 1u) << (i * multiplier); 2453 return new_mask; 2454 } 2455 2456 static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src, 2457 unsigned start, unsigned count) 2458 { 2459 LLVMTypeRef type = LLVMTypeOf(src); 2460 2461 if (LLVMGetTypeKind(type) != LLVMVectorTypeKind) { 2462 assert(start == 0); 2463 assert(count == 1); 2464 return src; 2465 } 2466 2467 unsigned src_elements = LLVMGetVectorSize(type); 2468 assert(start < src_elements); 2469 assert(start + count <= src_elements); 2470 2471 if (start == 0 && count == src_elements) 2472 return src; 2473 2474 if (count == 1) 2475 return LLVMBuildExtractElement(ctx->builder, src, LLVMConstInt(ctx->i32, start, false), ""); 2476 2477 assert(count <= 8); 2478 LLVMValueRef indices[8]; 2479 for (unsigned i = 0; i < count; ++i) 2480 indices[i] = LLVMConstInt(ctx->i32, start + i, false); 2481 2482 LLVMValueRef swizzle = LLVMConstVector(indices, count); 2483 return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, ""); 2484 } 2485 2486 static void visit_store_ssbo(struct ac_nir_context *ctx, 2487 nir_intrinsic_instr *instr) 2488 { 2489 const char *store_name; 2490 LLVMValueRef src_data = get_src(ctx, instr->src[0]); 2491 LLVMTypeRef data_type = ctx->ac.f32; 2492 int elem_size_mult = get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 32; 2493 int components_32bit = elem_size_mult * instr->num_components; 2494 unsigned writemask = nir_intrinsic_write_mask(instr); 2495 LLVMValueRef base_data, base_offset; 2496 LLVMValueRef params[6]; 2497 2498 params[1] = ctx->abi->load_ssbo(ctx->abi, 2499 get_src(ctx, instr->src[1]), true); 2500 params[2] = ctx->ac.i32_0; /* vindex */ 2501 params[4] = ctx->ac.i1false; /* glc */ 2502 params[5] = ctx->ac.i1false; /* slc */ 2503 2504 if (components_32bit > 1) 2505 data_type = LLVMVectorType(ctx->ac.f32, components_32bit); 2506 2507 writemask = widen_mask(writemask, elem_size_mult); 2508 2509 base_data = ac_to_float(&ctx->ac, src_data); 2510 base_data = trim_vector(&ctx->ac, base_data, instr->num_components); 2511 base_data = LLVMBuildBitCast(ctx->ac.builder, base_data, 2512 data_type, ""); 2513 base_offset = get_src(ctx, instr->src[2]); /* voffset */ 2514 while (writemask) { 2515 int start, count; 2516 LLVMValueRef data; 2517 LLVMValueRef offset; 2518 2519 u_bit_scan_consecutive_range(&writemask, &start, &count); 2520 2521 /* Due to an LLVM limitation, split 3-element writes 2522 * into a 2-element and a 1-element write. */ 2523 if (count == 3) { 2524 writemask |= 1 << (start + 2); 2525 count = 2; 2526 } 2527 2528 if (count > 4) { 2529 writemask |= ((1u << (count - 4)) - 1u) << (start + 4); 2530 count = 4; 2531 } 2532 2533 if (count == 4) { 2534 store_name = "llvm.amdgcn.buffer.store.v4f32"; 2535 } else if (count == 2) { 2536 store_name = "llvm.amdgcn.buffer.store.v2f32"; 2537 2538 } else { 2539 assert(count == 1); 2540 store_name = "llvm.amdgcn.buffer.store.f32"; 2541 } 2542 data = extract_vector_range(&ctx->ac, base_data, start, count); 2543 2544 offset = base_offset; 2545 if (start != 0) { 2546 offset = LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, start * 4, false), ""); 2547 } 2548 params[0] = data; 2549 params[3] = offset; 2550 ac_build_intrinsic(&ctx->ac, store_name, 2551 ctx->ac.voidt, params, 6, 0); 2552 } 2553 } 2554 2555 static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, 2556 const nir_intrinsic_instr *instr) 2557 { 2558 const char *name; 2559 LLVMValueRef params[6]; 2560 int arg_count = 0; 2561 2562 if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) { 2563 params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0); 2564 } 2565 params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0); 2566 params[arg_count++] = ctx->abi->load_ssbo(ctx->abi, 2567 get_src(ctx, instr->src[0]), 2568 true); 2569 params[arg_count++] = ctx->ac.i32_0; /* vindex */ 2570 params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */ 2571 params[arg_count++] = LLVMConstInt(ctx->ac.i1, 0, false); /* slc */ 2572 2573 switch (instr->intrinsic) { 2574 case nir_intrinsic_ssbo_atomic_add: 2575 name = "llvm.amdgcn.buffer.atomic.add"; 2576 break; 2577 case nir_intrinsic_ssbo_atomic_imin: 2578 name = "llvm.amdgcn.buffer.atomic.smin"; 2579 break; 2580 case nir_intrinsic_ssbo_atomic_umin: 2581 name = "llvm.amdgcn.buffer.atomic.umin"; 2582 break; 2583 case nir_intrinsic_ssbo_atomic_imax: 2584 name = "llvm.amdgcn.buffer.atomic.smax"; 2585 break; 2586 case nir_intrinsic_ssbo_atomic_umax: 2587 name = "llvm.amdgcn.buffer.atomic.umax"; 2588 break; 2589 case nir_intrinsic_ssbo_atomic_and: 2590 name = "llvm.amdgcn.buffer.atomic.and"; 2591 break; 2592 case nir_intrinsic_ssbo_atomic_or: 2593 name = "llvm.amdgcn.buffer.atomic.or"; 2594 break; 2595 case nir_intrinsic_ssbo_atomic_xor: 2596 name = "llvm.amdgcn.buffer.atomic.xor"; 2597 break; 2598 case nir_intrinsic_ssbo_atomic_exchange: 2599 name = "llvm.amdgcn.buffer.atomic.swap"; 2600 break; 2601 case nir_intrinsic_ssbo_atomic_comp_swap: 2602 name = "llvm.amdgcn.buffer.atomic.cmpswap"; 2603 break; 2604 default: 2605 abort(); 2606 } 2607 2608 return ac_build_intrinsic(&ctx->ac, name, ctx->ac.i32, params, arg_count, 0); 2609 } 2610 2611 static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, 2612 const nir_intrinsic_instr *instr) 2613 { 2614 LLVMValueRef results[2]; 2615 int load_components; 2616 int num_components = instr->num_components; 2617 if (instr->dest.ssa.bit_size == 64) 2618 num_components *= 2; 2619 2620 for (int i = 0; i < num_components; i += load_components) { 2621 load_components = MIN2(num_components - i, 4); 2622 const char *load_name; 2623 LLVMTypeRef data_type = ctx->ac.f32; 2624 LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * 4, false); 2625 offset = LLVMBuildAdd(ctx->ac.builder, get_src(ctx, instr->src[1]), offset, ""); 2626 2627 if (load_components == 3) 2628 data_type = LLVMVectorType(ctx->ac.f32, 4); 2629 else if (load_components > 1) 2630 data_type = LLVMVectorType(ctx->ac.f32, load_components); 2631 2632 if (load_components >= 3) 2633 load_name = "llvm.amdgcn.buffer.load.v4f32"; 2634 else if (load_components == 2) 2635 load_name = "llvm.amdgcn.buffer.load.v2f32"; 2636 else if (load_components == 1) 2637 load_name = "llvm.amdgcn.buffer.load.f32"; 2638 else 2639 unreachable("unhandled number of components"); 2640 2641 LLVMValueRef params[] = { 2642 ctx->abi->load_ssbo(ctx->abi, 2643 get_src(ctx, instr->src[0]), 2644 false), 2645 ctx->ac.i32_0, 2646 offset, 2647 ctx->ac.i1false, 2648 ctx->ac.i1false, 2649 }; 2650 2651 results[i > 0 ? 1 : 0] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0); 2652 } 2653 2654 assume(results[0]); 2655 LLVMValueRef ret = results[0]; 2656 if (num_components > 4 || num_components == 3) { 2657 LLVMValueRef masks[] = { 2658 LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false), 2659 LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false), 2660 LLVMConstInt(ctx->ac.i32, 4, false), LLVMConstInt(ctx->ac.i32, 5, false), 2661 LLVMConstInt(ctx->ac.i32, 6, false), LLVMConstInt(ctx->ac.i32, 7, false) 2662 }; 2663 2664 LLVMValueRef swizzle = LLVMConstVector(masks, num_components); 2665 ret = LLVMBuildShuffleVector(ctx->ac.builder, results[0], 2666 results[num_components > 4 ? 1 : 0], swizzle, ""); 2667 } 2668 2669 return LLVMBuildBitCast(ctx->ac.builder, ret, 2670 get_def_type(ctx, &instr->dest.ssa), ""); 2671 } 2672 2673 static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, 2674 const nir_intrinsic_instr *instr) 2675 { 2676 LLVMValueRef ret; 2677 LLVMValueRef rsrc = get_src(ctx, instr->src[0]); 2678 LLVMValueRef offset = get_src(ctx, instr->src[1]); 2679 int num_components = instr->num_components; 2680 2681 if (ctx->abi->load_ubo) 2682 rsrc = ctx->abi->load_ubo(ctx->abi, rsrc); 2683 2684 if (instr->dest.ssa.bit_size == 64) 2685 num_components *= 2; 2686 2687 ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, 2688 NULL, 0, false, false, true, true); 2689 ret = trim_vector(&ctx->ac, ret, num_components); 2690 return LLVMBuildBitCast(ctx->ac.builder, ret, 2691 get_def_type(ctx, &instr->dest.ssa), ""); 2692 } 2693 2694 static void 2695 get_deref_offset(struct ac_nir_context *ctx, nir_deref_var *deref, 2696 bool vs_in, unsigned *vertex_index_out, 2697 LLVMValueRef *vertex_index_ref, 2698 unsigned *const_out, LLVMValueRef *indir_out) 2699 { 2700 unsigned const_offset = 0; 2701 nir_deref *tail = &deref->deref; 2702 LLVMValueRef offset = NULL; 2703 2704 if (vertex_index_out != NULL || vertex_index_ref != NULL) { 2705 tail = tail->child; 2706 nir_deref_array *deref_array = nir_deref_as_array(tail); 2707 if (vertex_index_out) 2708 *vertex_index_out = deref_array->base_offset; 2709 2710 if (vertex_index_ref) { 2711 LLVMValueRef vtx = LLVMConstInt(ctx->ac.i32, deref_array->base_offset, false); 2712 if (deref_array->deref_array_type == nir_deref_array_type_indirect) { 2713 vtx = LLVMBuildAdd(ctx->ac.builder, vtx, get_src(ctx, deref_array->indirect), ""); 2714 } 2715 *vertex_index_ref = vtx; 2716 } 2717 } 2718 2719 if (deref->var->data.compact) { 2720 assert(tail->child->deref_type == nir_deref_type_array); 2721 assert(glsl_type_is_scalar(glsl_without_array(deref->var->type))); 2722 nir_deref_array *deref_array = nir_deref_as_array(tail->child); 2723 /* We always lower indirect dereferences for "compact" array vars. */ 2724 assert(deref_array->deref_array_type == nir_deref_array_type_direct); 2725 2726 const_offset = deref_array->base_offset; 2727 goto out; 2728 } 2729 2730 while (tail->child != NULL) { 2731 const struct glsl_type *parent_type = tail->type; 2732 tail = tail->child; 2733 2734 if (tail->deref_type == nir_deref_type_array) { 2735 nir_deref_array *deref_array = nir_deref_as_array(tail); 2736 LLVMValueRef index, stride, local_offset; 2737 unsigned size = glsl_count_attribute_slots(tail->type, vs_in); 2738 2739 const_offset += size * deref_array->base_offset; 2740 if (deref_array->deref_array_type == nir_deref_array_type_direct) 2741 continue; 2742 2743 assert(deref_array->deref_array_type == nir_deref_array_type_indirect); 2744 index = get_src(ctx, deref_array->indirect); 2745 stride = LLVMConstInt(ctx->ac.i32, size, 0); 2746 local_offset = LLVMBuildMul(ctx->ac.builder, stride, index, ""); 2747 2748 if (offset) 2749 offset = LLVMBuildAdd(ctx->ac.builder, offset, local_offset, ""); 2750 else 2751 offset = local_offset; 2752 } else if (tail->deref_type == nir_deref_type_struct) { 2753 nir_deref_struct *deref_struct = nir_deref_as_struct(tail); 2754 2755 for (unsigned i = 0; i < deref_struct->index; i++) { 2756 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i); 2757 const_offset += glsl_count_attribute_slots(ft, vs_in); 2758 } 2759 } else 2760 unreachable("unsupported deref type"); 2761 2762 } 2763 out: 2764 if (const_offset && offset) 2765 offset = LLVMBuildAdd(ctx->ac.builder, offset, 2766 LLVMConstInt(ctx->ac.i32, const_offset, 0), 2767 ""); 2768 2769 *const_out = const_offset; 2770 *indir_out = offset; 2771 } 2772 2773 2774 /* The offchip buffer layout for TCS->TES is 2775 * 2776 * - attribute 0 of patch 0 vertex 0 2777 * - attribute 0 of patch 0 vertex 1 2778 * - attribute 0 of patch 0 vertex 2 2779 * ... 2780 * - attribute 0 of patch 1 vertex 0 2781 * - attribute 0 of patch 1 vertex 1 2782 * ... 2783 * - attribute 1 of patch 0 vertex 0 2784 * - attribute 1 of patch 0 vertex 1 2785 * ... 2786 * - per patch attribute 0 of patch 0 2787 * - per patch attribute 0 of patch 1 2788 * ... 2789 * 2790 * Note that every attribute has 4 components. 2791 */ 2792 static LLVMValueRef get_tcs_tes_buffer_address(struct nir_to_llvm_context *ctx, 2793 LLVMValueRef vertex_index, 2794 LLVMValueRef param_index) 2795 { 2796 LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices; 2797 LLVMValueRef param_stride, constant16; 2798 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); 2799 2800 vertices_per_patch = unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 9, 6); 2801 num_patches = unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 0, 9); 2802 total_vertices = LLVMBuildMul(ctx->builder, vertices_per_patch, 2803 num_patches, ""); 2804 2805 constant16 = LLVMConstInt(ctx->ac.i32, 16, false); 2806 if (vertex_index) { 2807 base_addr = LLVMBuildMul(ctx->builder, rel_patch_id, 2808 vertices_per_patch, ""); 2809 2810 base_addr = LLVMBuildAdd(ctx->builder, base_addr, 2811 vertex_index, ""); 2812 2813 param_stride = total_vertices; 2814 } else { 2815 base_addr = rel_patch_id; 2816 param_stride = num_patches; 2817 } 2818 2819 base_addr = LLVMBuildAdd(ctx->builder, base_addr, 2820 LLVMBuildMul(ctx->builder, param_index, 2821 param_stride, ""), ""); 2822 2823 base_addr = LLVMBuildMul(ctx->builder, base_addr, constant16, ""); 2824 2825 if (!vertex_index) { 2826 LLVMValueRef patch_data_offset = 2827 unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 16, 16); 2828 2829 base_addr = LLVMBuildAdd(ctx->builder, base_addr, 2830 patch_data_offset, ""); 2831 } 2832 return base_addr; 2833 } 2834 2835 static LLVMValueRef get_tcs_tes_buffer_address_params(struct nir_to_llvm_context *ctx, 2836 unsigned param, 2837 unsigned const_index, 2838 bool is_compact, 2839 LLVMValueRef vertex_index, 2840 LLVMValueRef indir_index) 2841 { 2842 LLVMValueRef param_index; 2843 2844 if (indir_index) 2845 param_index = LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->ac.i32, param, false), 2846 indir_index, ""); 2847 else { 2848 if (const_index && !is_compact) 2849 param += const_index; 2850 param_index = LLVMConstInt(ctx->ac.i32, param, false); 2851 } 2852 return get_tcs_tes_buffer_address(ctx, vertex_index, param_index); 2853 } 2854 2855 static void 2856 mark_tess_output(struct nir_to_llvm_context *ctx, 2857 bool is_patch, uint32_t param, int num_slots) 2858 2859 { 2860 uint64_t slot_mask = (1ull << num_slots) - 1; 2861 if (is_patch) { 2862 ctx->tess_patch_outputs_written |= (slot_mask << param); 2863 } else 2864 ctx->tess_outputs_written |= (slot_mask<< param); 2865 } 2866 2867 static LLVMValueRef 2868 get_dw_address(struct nir_to_llvm_context *ctx, 2869 LLVMValueRef dw_addr, 2870 unsigned param, 2871 unsigned const_index, 2872 bool compact_const_index, 2873 LLVMValueRef vertex_index, 2874 LLVMValueRef stride, 2875 LLVMValueRef indir_index) 2876 2877 { 2878 2879 if (vertex_index) { 2880 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, 2881 LLVMBuildMul(ctx->builder, 2882 vertex_index, 2883 stride, ""), ""); 2884 } 2885 2886 if (indir_index) 2887 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, 2888 LLVMBuildMul(ctx->builder, indir_index, 2889 LLVMConstInt(ctx->ac.i32, 4, false), ""), ""); 2890 else if (const_index && !compact_const_index) 2891 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, 2892 LLVMConstInt(ctx->ac.i32, const_index * 4, false), ""); 2893 2894 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, 2895 LLVMConstInt(ctx->ac.i32, param * 4, false), ""); 2896 2897 if (const_index && compact_const_index) 2898 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, 2899 LLVMConstInt(ctx->ac.i32, const_index, false), ""); 2900 return dw_addr; 2901 } 2902 2903 static LLVMValueRef 2904 load_tcs_varyings(struct ac_shader_abi *abi, 2905 LLVMValueRef vertex_index, 2906 LLVMValueRef indir_index, 2907 unsigned const_index, 2908 unsigned location, 2909 unsigned driver_location, 2910 unsigned component, 2911 unsigned num_components, 2912 bool is_patch, 2913 bool is_compact, 2914 bool load_input) 2915 { 2916 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); 2917 LLVMValueRef dw_addr, stride; 2918 LLVMValueRef value[4], result; 2919 unsigned param = shader_io_get_unique_index(location); 2920 2921 if (load_input) { 2922 stride = unpack_param(&ctx->ac, ctx->tcs_in_layout, 13, 8); 2923 dw_addr = get_tcs_in_current_patch_offset(ctx); 2924 } else { 2925 if (!is_patch) { 2926 stride = unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8); 2927 dw_addr = get_tcs_out_current_patch_offset(ctx); 2928 } else { 2929 dw_addr = get_tcs_out_current_patch_data_offset(ctx); 2930 stride = NULL; 2931 } 2932 } 2933 2934 dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride, 2935 indir_index); 2936 2937 for (unsigned i = 0; i < num_components + component; i++) { 2938 value[i] = ac_lds_load(&ctx->ac, dw_addr); 2939 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, 2940 ctx->ac.i32_1, ""); 2941 } 2942 result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component); 2943 return result; 2944 } 2945 2946 static void 2947 store_tcs_output(struct ac_shader_abi *abi, 2948 const nir_variable *var, 2949 LLVMValueRef vertex_index, 2950 LLVMValueRef param_index, 2951 unsigned const_index, 2952 LLVMValueRef src, 2953 unsigned writemask) 2954 { 2955 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); 2956 const unsigned location = var->data.location; 2957 const unsigned component = var->data.location_frac; 2958 const bool is_patch = var->data.patch; 2959 const bool is_compact = var->data.compact; 2960 const unsigned count = glsl_count_attribute_slots(var->type, false); 2961 LLVMValueRef dw_addr; 2962 LLVMValueRef stride = NULL; 2963 LLVMValueRef buf_addr = NULL; 2964 unsigned param; 2965 bool store_lds = true; 2966 2967 if (is_patch) { 2968 if (!(ctx->tcs_patch_outputs_read & (1U << (location - VARYING_SLOT_PATCH0)))) 2969 store_lds = false; 2970 } else { 2971 if (!(ctx->tcs_outputs_read & (1ULL << location))) 2972 store_lds = false; 2973 } 2974 2975 param = shader_io_get_unique_index(location); 2976 if (location == VARYING_SLOT_CLIP_DIST0 && 2977 is_compact && const_index > 3) { 2978 const_index -= 3; 2979 param++; 2980 } 2981 2982 if (!is_patch) { 2983 stride = unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8); 2984 dw_addr = get_tcs_out_current_patch_offset(ctx); 2985 } else { 2986 dw_addr = get_tcs_out_current_patch_data_offset(ctx); 2987 } 2988 2989 if (param_index) 2990 mark_tess_output(ctx, is_patch, param, count); 2991 else 2992 mark_tess_output(ctx, is_patch, param, 1); 2993 2994 dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride, 2995 param_index); 2996 buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, is_compact, 2997 vertex_index, param_index); 2998 2999 bool is_tess_factor = false; 3000 if (location == VARYING_SLOT_TESS_LEVEL_INNER || 3001 location == VARYING_SLOT_TESS_LEVEL_OUTER) 3002 is_tess_factor = true; 3003 3004 unsigned base = is_compact ? const_index : 0; 3005 for (unsigned chan = 0; chan < 8; chan++) { 3006 if (!(writemask & (1 << chan))) 3007 continue; 3008 LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component); 3009 3010 if (store_lds || is_tess_factor) { 3011 LLVMValueRef dw_addr_chan = 3012 LLVMBuildAdd(ctx->builder, dw_addr, 3013 LLVMConstInt(ctx->ac.i32, chan, false), ""); 3014 ac_lds_store(&ctx->ac, dw_addr_chan, value); 3015 } 3016 3017 if (!is_tess_factor && writemask != 0xF) 3018 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1, 3019 buf_addr, ctx->oc_lds, 3020 4 * (base + chan), 1, 0, true, false); 3021 } 3022 3023 if (writemask == 0xF) { 3024 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, src, 4, 3025 buf_addr, ctx->oc_lds, 3026 (base * 4), 1, 0, true, false); 3027 } 3028 } 3029 3030 static LLVMValueRef 3031 load_tes_input(struct ac_shader_abi *abi, 3032 LLVMValueRef vertex_index, 3033 LLVMValueRef param_index, 3034 unsigned const_index, 3035 unsigned location, 3036 unsigned driver_location, 3037 unsigned component, 3038 unsigned num_components, 3039 bool is_patch, 3040 bool is_compact, 3041 bool load_input) 3042 { 3043 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); 3044 LLVMValueRef buf_addr; 3045 LLVMValueRef result; 3046 unsigned param = shader_io_get_unique_index(location); 3047 3048 if (location == VARYING_SLOT_CLIP_DIST0 && is_compact && const_index > 3) { 3049 const_index -= 3; 3050 param++; 3051 } 3052 3053 buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, 3054 is_compact, vertex_index, param_index); 3055 3056 LLVMValueRef comp_offset = LLVMConstInt(ctx->ac.i32, component * 4, false); 3057 buf_addr = LLVMBuildAdd(ctx->builder, buf_addr, comp_offset, ""); 3058 3059 result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, num_components, NULL, 3060 buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false); 3061 result = trim_vector(&ctx->ac, result, num_components); 3062 return result; 3063 } 3064 3065 static LLVMValueRef 3066 load_gs_input(struct ac_shader_abi *abi, 3067 unsigned location, 3068 unsigned driver_location, 3069 unsigned component, 3070 unsigned num_components, 3071 unsigned vertex_index, 3072 unsigned const_index, 3073 LLVMTypeRef type) 3074 { 3075 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); 3076 LLVMValueRef vtx_offset; 3077 LLVMValueRef args[9]; 3078 unsigned param, vtx_offset_param; 3079 LLVMValueRef value[4], result; 3080 3081 vtx_offset_param = vertex_index; 3082 assert(vtx_offset_param < 6); 3083 vtx_offset = LLVMBuildMul(ctx->builder, ctx->gs_vtx_offset[vtx_offset_param], 3084 LLVMConstInt(ctx->ac.i32, 4, false), ""); 3085 3086 param = shader_io_get_unique_index(location); 3087 3088 for (unsigned i = component; i < num_components + component; i++) { 3089 if (ctx->ac.chip_class >= GFX9) { 3090 LLVMValueRef dw_addr = ctx->gs_vtx_offset[vtx_offset_param]; 3091 dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, 3092 LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), ""); 3093 value[i] = ac_lds_load(&ctx->ac, dw_addr); 3094 } else { 3095 args[0] = ctx->esgs_ring; 3096 args[1] = vtx_offset; 3097 args[2] = LLVMConstInt(ctx->ac.i32, (param * 4 + i + const_index) * 256, false); 3098 args[3] = ctx->ac.i32_0; 3099 args[4] = ctx->ac.i32_1; /* OFFEN */ 3100 args[5] = ctx->ac.i32_0; /* IDXEN */ 3101 args[6] = ctx->ac.i32_1; /* GLC */ 3102 args[7] = ctx->ac.i32_0; /* SLC */ 3103 args[8] = ctx->ac.i32_0; /* TFE */ 3104 3105 value[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32", 3106 ctx->ac.i32, args, 9, 3107 AC_FUNC_ATTR_READONLY | 3108 AC_FUNC_ATTR_LEGACY); 3109 } 3110 } 3111 result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component); 3112 3113 return result; 3114 } 3115 3116 static LLVMValueRef 3117 build_gep_for_deref(struct ac_nir_context *ctx, 3118 nir_deref_var *deref) 3119 { 3120 struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, deref->var); 3121 assert(entry->data); 3122 LLVMValueRef val = entry->data; 3123 nir_deref *tail = deref->deref.child; 3124 while (tail != NULL) { 3125 LLVMValueRef offset; 3126 switch (tail->deref_type) { 3127 case nir_deref_type_array: { 3128 nir_deref_array *array = nir_deref_as_array(tail); 3129 offset = LLVMConstInt(ctx->ac.i32, array->base_offset, 0); 3130 if (array->deref_array_type == 3131 nir_deref_array_type_indirect) { 3132 offset = LLVMBuildAdd(ctx->ac.builder, offset, 3133 get_src(ctx, 3134 array->indirect), 3135 ""); 3136 } 3137 break; 3138 } 3139 case nir_deref_type_struct: { 3140 nir_deref_struct *deref_struct = 3141 nir_deref_as_struct(tail); 3142 offset = LLVMConstInt(ctx->ac.i32, 3143 deref_struct->index, 0); 3144 break; 3145 } 3146 default: 3147 unreachable("bad deref type"); 3148 } 3149 val = ac_build_gep0(&ctx->ac, val, offset); 3150 tail = tail->child; 3151 } 3152 return val; 3153 } 3154 3155 static LLVMValueRef load_tess_varyings(struct ac_nir_context *ctx, 3156 nir_intrinsic_instr *instr, 3157 bool load_inputs) 3158 { 3159 LLVMValueRef result; 3160 LLVMValueRef vertex_index = NULL; 3161 LLVMValueRef indir_index = NULL; 3162 unsigned const_index = 0; 3163 unsigned location = instr->variables[0]->var->data.location; 3164 unsigned driver_location = instr->variables[0]->var->data.driver_location; 3165 const bool is_patch = instr->variables[0]->var->data.patch; 3166 const bool is_compact = instr->variables[0]->var->data.compact; 3167 3168 get_deref_offset(ctx, instr->variables[0], 3169 false, NULL, is_patch ? NULL : &vertex_index, 3170 &const_index, &indir_index); 3171 3172 result = ctx->abi->load_tess_varyings(ctx->abi, vertex_index, indir_index, 3173 const_index, location, driver_location, 3174 instr->variables[0]->var->data.location_frac, 3175 instr->num_components, 3176 is_patch, is_compact, load_inputs); 3177 return LLVMBuildBitCast(ctx->ac.builder, result, get_def_type(ctx, &instr->dest.ssa), ""); 3178 } 3179 3180 static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, 3181 nir_intrinsic_instr *instr) 3182 { 3183 LLVMValueRef values[8]; 3184 int idx = instr->variables[0]->var->data.driver_location; 3185 int ve = instr->dest.ssa.num_components; 3186 unsigned comp = instr->variables[0]->var->data.location_frac; 3187 LLVMValueRef indir_index; 3188 LLVMValueRef ret; 3189 unsigned const_index; 3190 unsigned stride = instr->variables[0]->var->data.compact ? 1 : 4; 3191 bool vs_in = ctx->stage == MESA_SHADER_VERTEX && 3192 instr->variables[0]->var->data.mode == nir_var_shader_in; 3193 get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL, 3194 &const_index, &indir_index); 3195 3196 if (instr->dest.ssa.bit_size == 64) 3197 ve *= 2; 3198 3199 switch (instr->variables[0]->var->data.mode) { 3200 case nir_var_shader_in: 3201 if (ctx->stage == MESA_SHADER_TESS_CTRL || 3202 ctx->stage == MESA_SHADER_TESS_EVAL) { 3203 return load_tess_varyings(ctx, instr, true); 3204 } 3205 3206 if (ctx->stage == MESA_SHADER_GEOMETRY) { 3207 LLVMValueRef indir_index; 3208 unsigned const_index, vertex_index; 3209 get_deref_offset(ctx, instr->variables[0], 3210 false, &vertex_index, NULL, 3211 &const_index, &indir_index); 3212 return ctx->abi->load_inputs(ctx->abi, instr->variables[0]->var->data.location, 3213 instr->variables[0]->var->data.driver_location, 3214 instr->variables[0]->var->data.location_frac, ve, 3215 vertex_index, const_index, 3216 nir2llvmtype(ctx, instr->variables[0]->var->type)); 3217 } 3218 3219 for (unsigned chan = comp; chan < ve + comp; chan++) { 3220 if (indir_index) { 3221 unsigned count = glsl_count_attribute_slots( 3222 instr->variables[0]->var->type, 3223 ctx->stage == MESA_SHADER_VERTEX); 3224 count -= chan / 4; 3225 LLVMValueRef tmp_vec = ac_build_gather_values_extended( 3226 &ctx->ac, ctx->abi->inputs + idx + chan, count, 3227 stride, false, true); 3228 3229 values[chan] = LLVMBuildExtractElement(ctx->ac.builder, 3230 tmp_vec, 3231 indir_index, ""); 3232 } else 3233 values[chan] = ctx->abi->inputs[idx + chan + const_index * stride]; 3234 } 3235 break; 3236 case nir_var_local: 3237 for (unsigned chan = 0; chan < ve; chan++) { 3238 if (indir_index) { 3239 unsigned count = glsl_count_attribute_slots( 3240 instr->variables[0]->var->type, false); 3241 count -= chan / 4; 3242 LLVMValueRef tmp_vec = ac_build_gather_values_extended( 3243 &ctx->ac, ctx->locals + idx + chan, count, 3244 stride, true, true); 3245 3246 values[chan] = LLVMBuildExtractElement(ctx->ac.builder, 3247 tmp_vec, 3248 indir_index, ""); 3249 } else { 3250 values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], ""); 3251 } 3252 } 3253 break; 3254 case nir_var_shared: { 3255 LLVMValueRef address = build_gep_for_deref(ctx, 3256 instr->variables[0]); 3257 LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, ""); 3258 return LLVMBuildBitCast(ctx->ac.builder, val, 3259 get_def_type(ctx, &instr->dest.ssa), 3260 ""); 3261 } 3262 case nir_var_shader_out: 3263 if (ctx->stage == MESA_SHADER_TESS_CTRL) { 3264 return load_tess_varyings(ctx, instr, false); 3265 } 3266 3267 for (unsigned chan = comp; chan < ve + comp; chan++) { 3268 if (indir_index) { 3269 unsigned count = glsl_count_attribute_slots( 3270 instr->variables[0]->var->type, false); 3271 count -= chan / 4; 3272 LLVMValueRef tmp_vec = ac_build_gather_values_extended( 3273 &ctx->ac, ctx->outputs + idx + chan, count, 3274 stride, true, true); 3275 3276 values[chan] = LLVMBuildExtractElement(ctx->ac.builder, 3277 tmp_vec, 3278 indir_index, ""); 3279 } else { 3280 values[chan] = LLVMBuildLoad(ctx->ac.builder, 3281 ctx->outputs[idx + chan + const_index * stride], 3282 ""); 3283 } 3284 } 3285 break; 3286 default: 3287 unreachable("unhandle variable mode"); 3288 } 3289 ret = ac_build_varying_gather_values(&ctx->ac, values, ve, comp); 3290 return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); 3291 } 3292 3293 static void 3294 visit_store_var(struct ac_nir_context *ctx, 3295 nir_intrinsic_instr *instr) 3296 { 3297 LLVMValueRef temp_ptr, value; 3298 int idx = instr->variables[0]->var->data.driver_location; 3299 unsigned comp = instr->variables[0]->var->data.location_frac; 3300 LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0])); 3301 int writemask = instr->const_index[0] << comp; 3302 LLVMValueRef indir_index; 3303 unsigned const_index; 3304 get_deref_offset(ctx, instr->variables[0], false, 3305 NULL, NULL, &const_index, &indir_index); 3306 3307 if (get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) { 3308 3309 src = LLVMBuildBitCast(ctx->ac.builder, src, 3310 LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2), 3311 ""); 3312 3313 writemask = widen_mask(writemask, 2); 3314 } 3315 3316 switch (instr->variables[0]->var->data.mode) { 3317 case nir_var_shader_out: 3318 3319 if (ctx->stage == MESA_SHADER_TESS_CTRL) { 3320 LLVMValueRef vertex_index = NULL; 3321 LLVMValueRef indir_index = NULL; 3322 unsigned const_index = 0; 3323 const bool is_patch = instr->variables[0]->var->data.patch; 3324 3325 get_deref_offset(ctx, instr->variables[0], 3326 false, NULL, is_patch ? NULL : &vertex_index, 3327 &const_index, &indir_index); 3328 3329 ctx->abi->store_tcs_outputs(ctx->abi, instr->variables[0]->var, 3330 vertex_index, indir_index, 3331 const_index, src, writemask); 3332 return; 3333 } 3334 3335 for (unsigned chan = 0; chan < 8; chan++) { 3336 int stride = 4; 3337 if (!(writemask & (1 << chan))) 3338 continue; 3339 3340 value = ac_llvm_extract_elem(&ctx->ac, src, chan - comp); 3341 3342 if (instr->variables[0]->var->data.compact) 3343 stride = 1; 3344 if (indir_index) { 3345 unsigned count = glsl_count_attribute_slots( 3346 instr->variables[0]->var->type, false); 3347 count -= chan / 4; 3348 LLVMValueRef tmp_vec = ac_build_gather_values_extended( 3349 &ctx->ac, ctx->outputs + idx + chan, count, 3350 stride, true, true); 3351 3352 tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec, 3353 value, indir_index, ""); 3354 build_store_values_extended(&ctx->ac, ctx->outputs + idx + chan, 3355 count, stride, tmp_vec); 3356 3357 } else { 3358 temp_ptr = ctx->outputs[idx + chan + const_index * stride]; 3359 3360 LLVMBuildStore(ctx->ac.builder, value, temp_ptr); 3361 } 3362 } 3363 break; 3364 case nir_var_local: 3365 for (unsigned chan = 0; chan < 8; chan++) { 3366 if (!(writemask & (1 << chan))) 3367 continue; 3368 3369 value = ac_llvm_extract_elem(&ctx->ac, src, chan); 3370 if (indir_index) { 3371 unsigned count = glsl_count_attribute_slots( 3372 instr->variables[0]->var->type, false); 3373 count -= chan / 4; 3374 LLVMValueRef tmp_vec = ac_build_gather_values_extended( 3375 &ctx->ac, ctx->locals + idx + chan, count, 3376 4, true, true); 3377 3378 tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec, 3379 value, indir_index, ""); 3380 build_store_values_extended(&ctx->ac, ctx->locals + idx + chan, 3381 count, 4, tmp_vec); 3382 } else { 3383 temp_ptr = ctx->locals[idx + chan + const_index * 4]; 3384 3385 LLVMBuildStore(ctx->ac.builder, value, temp_ptr); 3386 } 3387 } 3388 break; 3389 case nir_var_shared: { 3390 int writemask = instr->const_index[0]; 3391 LLVMValueRef address = build_gep_for_deref(ctx, 3392 instr->variables[0]); 3393 LLVMValueRef val = get_src(ctx, instr->src[0]); 3394 unsigned components = 3395 glsl_get_vector_elements( 3396 nir_deref_tail(&instr->variables[0]->deref)->type); 3397 if (writemask == (1 << components) - 1) { 3398 val = LLVMBuildBitCast( 3399 ctx->ac.builder, val, 3400 LLVMGetElementType(LLVMTypeOf(address)), ""); 3401 LLVMBuildStore(ctx->ac.builder, val, address); 3402 } else { 3403 for (unsigned chan = 0; chan < 4; chan++) { 3404 if (!(writemask & (1 << chan))) 3405 continue; 3406 LLVMValueRef ptr = 3407 LLVMBuildStructGEP(ctx->ac.builder, 3408 address, chan, ""); 3409 LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val, 3410 chan); 3411 src = LLVMBuildBitCast( 3412 ctx->ac.builder, src, 3413 LLVMGetElementType(LLVMTypeOf(ptr)), ""); 3414 LLVMBuildStore(ctx->ac.builder, src, ptr); 3415 } 3416 } 3417 break; 3418 } 3419 default: 3420 break; 3421 } 3422 } 3423 3424 static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array) 3425 { 3426 switch (dim) { 3427 case GLSL_SAMPLER_DIM_BUF: 3428 return 1; 3429 case GLSL_SAMPLER_DIM_1D: 3430 return array ? 2 : 1; 3431 case GLSL_SAMPLER_DIM_2D: 3432 return array ? 3 : 2; 3433 case GLSL_SAMPLER_DIM_MS: 3434 return array ? 4 : 3; 3435 case GLSL_SAMPLER_DIM_3D: 3436 case GLSL_SAMPLER_DIM_CUBE: 3437 return 3; 3438 case GLSL_SAMPLER_DIM_RECT: 3439 case GLSL_SAMPLER_DIM_SUBPASS: 3440 return 2; 3441 case GLSL_SAMPLER_DIM_SUBPASS_MS: 3442 return 3; 3443 default: 3444 break; 3445 } 3446 return 0; 3447 } 3448 3449 3450 3451 /* Adjust the sample index according to FMASK. 3452 * 3453 * For uncompressed MSAA surfaces, FMASK should return 0x76543210, 3454 * which is the identity mapping. Each nibble says which physical sample 3455 * should be fetched to get that sample. 3456 * 3457 * For example, 0x11111100 means there are only 2 samples stored and 3458 * the second sample covers 3/4 of the pixel. When reading samples 0 3459 * and 1, return physical sample 0 (determined by the first two 0s 3460 * in FMASK), otherwise return physical sample 1. 3461 * 3462 * The sample index should be adjusted as follows: 3463 * sample_index = (fmask >> (sample_index * 4)) & 0xF; 3464 */ 3465 static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx, 3466 LLVMValueRef coord_x, LLVMValueRef coord_y, 3467 LLVMValueRef coord_z, 3468 LLVMValueRef sample_index, 3469 LLVMValueRef fmask_desc_ptr) 3470 { 3471 LLVMValueRef fmask_load_address[4]; 3472 LLVMValueRef res; 3473 3474 fmask_load_address[0] = coord_x; 3475 fmask_load_address[1] = coord_y; 3476 if (coord_z) { 3477 fmask_load_address[2] = coord_z; 3478 fmask_load_address[3] = LLVMGetUndef(ctx->i32); 3479 } 3480 3481 struct ac_image_args args = {0}; 3482 3483 args.opcode = ac_image_load; 3484 args.da = coord_z ? true : false; 3485 args.resource = fmask_desc_ptr; 3486 args.dmask = 0xf; 3487 args.addr = ac_build_gather_values(ctx, fmask_load_address, coord_z ? 4 : 2); 3488 3489 res = ac_build_image_opcode(ctx, &args); 3490 3491 res = ac_to_integer(ctx, res); 3492 LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false); 3493 LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false); 3494 3495 LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder, 3496 res, 3497 ctx->i32_0, ""); 3498 3499 LLVMValueRef sample_index4 = 3500 LLVMBuildMul(ctx->builder, sample_index, four, ""); 3501 LLVMValueRef shifted_fmask = 3502 LLVMBuildLShr(ctx->builder, fmask, sample_index4, ""); 3503 LLVMValueRef final_sample = 3504 LLVMBuildAnd(ctx->builder, shifted_fmask, F, ""); 3505 3506 /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK 3507 * resource descriptor is 0 (invalid), 3508 */ 3509 LLVMValueRef fmask_desc = 3510 LLVMBuildBitCast(ctx->builder, fmask_desc_ptr, 3511 ctx->v8i32, ""); 3512 3513 LLVMValueRef fmask_word1 = 3514 LLVMBuildExtractElement(ctx->builder, fmask_desc, 3515 ctx->i32_1, ""); 3516 3517 LLVMValueRef word1_is_nonzero = 3518 LLVMBuildICmp(ctx->builder, LLVMIntNE, 3519 fmask_word1, ctx->i32_0, ""); 3520 3521 /* Replace the MSAA sample index. */ 3522 sample_index = 3523 LLVMBuildSelect(ctx->builder, word1_is_nonzero, 3524 final_sample, sample_index, ""); 3525 return sample_index; 3526 } 3527 3528 static LLVMValueRef get_image_coords(struct ac_nir_context *ctx, 3529 const nir_intrinsic_instr *instr) 3530 { 3531 const struct glsl_type *type = glsl_without_array(instr->variables[0]->var->type); 3532 3533 LLVMValueRef src0 = get_src(ctx, instr->src[0]); 3534 LLVMValueRef coords[4]; 3535 LLVMValueRef masks[] = { 3536 LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false), 3537 LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false), 3538 }; 3539 LLVMValueRef res; 3540 LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[1]), 0); 3541 3542 int count; 3543 enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); 3544 bool is_array = glsl_sampler_type_is_array(type); 3545 bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS || 3546 dim == GLSL_SAMPLER_DIM_SUBPASS_MS); 3547 bool is_ms = (dim == GLSL_SAMPLER_DIM_MS || 3548 dim == GLSL_SAMPLER_DIM_SUBPASS_MS); 3549 bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D; 3550 count = image_type_to_components_count(dim, is_array); 3551 3552 if (is_ms) { 3553 LLVMValueRef fmask_load_address[3]; 3554 int chan; 3555 3556 fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], ""); 3557 fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], ""); 3558 if (is_array) 3559 fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], ""); 3560 else 3561 fmask_load_address[2] = NULL; 3562 if (add_frag_pos) { 3563 for (chan = 0; chan < 2; ++chan) 3564 fmask_load_address[chan] = 3565 LLVMBuildAdd(ctx->ac.builder, fmask_load_address[chan], 3566 LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan], 3567 ctx->ac.i32, ""), ""); 3568 fmask_load_address[2] = ac_to_integer(&ctx->ac, ctx->abi->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]); 3569 } 3570 sample_index = adjust_sample_index_using_fmask(&ctx->ac, 3571 fmask_load_address[0], 3572 fmask_load_address[1], 3573 fmask_load_address[2], 3574 sample_index, 3575 get_sampler_desc(ctx, instr->variables[0], AC_DESC_FMASK, NULL, true, false)); 3576 } 3577 if (count == 1 && !gfx9_1d) { 3578 if (instr->src[0].ssa->num_components) 3579 res = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], ""); 3580 else 3581 res = src0; 3582 } else { 3583 int chan; 3584 if (is_ms) 3585 count--; 3586 for (chan = 0; chan < count; ++chan) { 3587 coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan); 3588 } 3589 if (add_frag_pos) { 3590 for (chan = 0; chan < 2; ++chan) 3591 coords[chan] = LLVMBuildAdd(ctx->ac.builder, coords[chan], LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan], 3592 ctx->ac.i32, ""), ""); 3593 coords[2] = ac_to_integer(&ctx->ac, ctx->abi->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]); 3594 count++; 3595 } 3596 3597 if (gfx9_1d) { 3598 if (is_array) { 3599 coords[2] = coords[1]; 3600 coords[1] = ctx->ac.i32_0; 3601 } else 3602 coords[1] = ctx->ac.i32_0; 3603 count++; 3604 } 3605 3606 if (is_ms) { 3607 coords[count] = sample_index; 3608 count++; 3609 } 3610 3611 if (count == 3) { 3612 coords[3] = LLVMGetUndef(ctx->ac.i32); 3613 count = 4; 3614 } 3615 res = ac_build_gather_values(&ctx->ac, coords, count); 3616 } 3617 return res; 3618 } 3619 3620 static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx, 3621 const nir_intrinsic_instr *instr, bool write) 3622 { 3623 LLVMValueRef rsrc = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, write); 3624 if (ctx->abi->gfx9_stride_size_workaround) { 3625 LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), ""); 3626 LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), ""); 3627 stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), ""); 3628 3629 LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->ac.builder, 3630 LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""), 3631 elem_count, stride, ""); 3632 3633 rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count, 3634 LLVMConstInt(ctx->ac.i32, 2, 0), ""); 3635 } 3636 return rsrc; 3637 } 3638 3639 static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, 3640 const nir_intrinsic_instr *instr) 3641 { 3642 LLVMValueRef params[7]; 3643 LLVMValueRef res; 3644 char intrinsic_name[64]; 3645 const nir_variable *var = instr->variables[0]->var; 3646 const struct glsl_type *type = var->type; 3647 3648 if(instr->variables[0]->deref.child) 3649 type = instr->variables[0]->deref.child->type; 3650 3651 type = glsl_without_array(type); 3652 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) { 3653 params[0] = get_image_buffer_descriptor(ctx, instr, false); 3654 params[1] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]), 3655 ctx->ac.i32_0, ""); /* vindex */ 3656 params[2] = ctx->ac.i32_0; /* voffset */ 3657 params[3] = ctx->ac.i1false; /* glc */ 3658 params[4] = ctx->ac.i1false; /* slc */ 3659 res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->ac.v4f32, 3660 params, 5, 0); 3661 3662 res = trim_vector(&ctx->ac, res, instr->dest.ssa.num_components); 3663 res = ac_to_integer(&ctx->ac, res); 3664 } else { 3665 bool is_da = glsl_sampler_type_is_array(type) || 3666 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE || 3667 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_3D || 3668 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS || 3669 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS; 3670 LLVMValueRef da = is_da ? ctx->ac.i1true : ctx->ac.i1false; 3671 LLVMValueRef glc = ctx->ac.i1false; 3672 LLVMValueRef slc = ctx->ac.i1false; 3673 3674 params[0] = get_image_coords(ctx, instr); 3675 params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false); 3676 params[2] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */ 3677 if (HAVE_LLVM <= 0x0309) { 3678 params[3] = ctx->ac.i1false; /* r128 */ 3679 params[4] = da; 3680 params[5] = glc; 3681 params[6] = slc; 3682 } else { 3683 LLVMValueRef lwe = ctx->ac.i1false; 3684 params[3] = glc; 3685 params[4] = slc; 3686 params[5] = lwe; 3687 params[6] = da; 3688 } 3689 3690 ac_get_image_intr_name("llvm.amdgcn.image.load", 3691 ctx->ac.v4f32, /* vdata */ 3692 LLVMTypeOf(params[0]), /* coords */ 3693 LLVMTypeOf(params[1]), /* rsrc */ 3694 intrinsic_name, sizeof(intrinsic_name)); 3695 3696 res = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.v4f32, 3697 params, 7, AC_FUNC_ATTR_READONLY); 3698 } 3699 return ac_to_integer(&ctx->ac, res); 3700 } 3701 3702 static void visit_image_store(struct ac_nir_context *ctx, 3703 nir_intrinsic_instr *instr) 3704 { 3705 LLVMValueRef params[8]; 3706 char intrinsic_name[64]; 3707 const nir_variable *var = instr->variables[0]->var; 3708 const struct glsl_type *type = glsl_without_array(var->type); 3709 LLVMValueRef glc = ctx->ac.i1false; 3710 bool force_glc = ctx->ac.chip_class == SI; 3711 if (force_glc) 3712 glc = ctx->ac.i1true; 3713 3714 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) { 3715 LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true); 3716 3717 params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */ 3718 params[1] = rsrc; 3719 params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]), 3720 ctx->ac.i32_0, ""); /* vindex */ 3721 params[3] = ctx->ac.i32_0; /* voffset */ 3722 params[4] = glc; /* glc */ 3723 params[5] = ctx->ac.i1false; /* slc */ 3724 ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt, 3725 params, 6, 0); 3726 } else { 3727 bool is_da = glsl_sampler_type_is_array(type) || 3728 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE || 3729 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_3D; 3730 LLVMValueRef da = is_da ? ctx->ac.i1true : ctx->ac.i1false; 3731 LLVMValueRef slc = ctx->ac.i1false; 3732 3733 params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); 3734 params[1] = get_image_coords(ctx, instr); /* coords */ 3735 params[2] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, true); 3736 params[3] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */ 3737 if (HAVE_LLVM <= 0x0309) { 3738 params[4] = ctx->ac.i1false; /* r128 */ 3739 params[5] = da; 3740 params[6] = glc; 3741 params[7] = slc; 3742 } else { 3743 LLVMValueRef lwe = ctx->ac.i1false; 3744 params[4] = glc; 3745 params[5] = slc; 3746 params[6] = lwe; 3747 params[7] = da; 3748 } 3749 3750 ac_get_image_intr_name("llvm.amdgcn.image.store", 3751 LLVMTypeOf(params[0]), /* vdata */ 3752 LLVMTypeOf(params[1]), /* coords */ 3753 LLVMTypeOf(params[2]), /* rsrc */ 3754 intrinsic_name, sizeof(intrinsic_name)); 3755 3756 ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.voidt, 3757 params, 8, 0); 3758 } 3759 3760 } 3761 3762 static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, 3763 const nir_intrinsic_instr *instr) 3764 { 3765 LLVMValueRef params[7]; 3766 int param_count = 0; 3767 const nir_variable *var = instr->variables[0]->var; 3768 3769 const char *atomic_name; 3770 char intrinsic_name[41]; 3771 const struct glsl_type *type = glsl_without_array(var->type); 3772 MAYBE_UNUSED int length; 3773 3774 bool is_unsigned = glsl_get_sampler_result_type(type) == GLSL_TYPE_UINT; 3775 3776 switch (instr->intrinsic) { 3777 case nir_intrinsic_image_atomic_add: 3778 atomic_name = "add"; 3779 break; 3780 case nir_intrinsic_image_atomic_min: 3781 atomic_name = is_unsigned ? "umin" : "smin"; 3782 break; 3783 case nir_intrinsic_image_atomic_max: 3784 atomic_name = is_unsigned ? "umax" : "smax"; 3785 break; 3786 case nir_intrinsic_image_atomic_and: 3787 atomic_name = "and"; 3788 break; 3789 case nir_intrinsic_image_atomic_or: 3790 atomic_name = "or"; 3791 break; 3792 case nir_intrinsic_image_atomic_xor: 3793 atomic_name = "xor"; 3794 break; 3795 case nir_intrinsic_image_atomic_exchange: 3796 atomic_name = "swap"; 3797 break; 3798 case nir_intrinsic_image_atomic_comp_swap: 3799 atomic_name = "cmpswap"; 3800 break; 3801 default: 3802 abort(); 3803 } 3804 3805 if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap) 3806 params[param_count++] = get_src(ctx, instr->src[3]); 3807 params[param_count++] = get_src(ctx, instr->src[2]); 3808 3809 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) { 3810 params[param_count++] = get_image_buffer_descriptor(ctx, instr, true); 3811 params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]), 3812 ctx->ac.i32_0, ""); /* vindex */ 3813 params[param_count++] = ctx->ac.i32_0; /* voffset */ 3814 params[param_count++] = ctx->ac.i1false; /* slc */ 3815 3816 length = snprintf(intrinsic_name, sizeof(intrinsic_name), 3817 "llvm.amdgcn.buffer.atomic.%s", atomic_name); 3818 } else { 3819 char coords_type[8]; 3820 3821 bool da = glsl_sampler_type_is_array(type) || 3822 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE; 3823 3824 LLVMValueRef coords = params[param_count++] = get_image_coords(ctx, instr); 3825 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, 3826 NULL, true, true); 3827 params[param_count++] = ctx->ac.i1false; /* r128 */ 3828 params[param_count++] = da ? ctx->ac.i1true : ctx->ac.i1false; /* da */ 3829 params[param_count++] = ctx->ac.i1false; /* slc */ 3830 3831 build_int_type_name(LLVMTypeOf(coords), 3832 coords_type, sizeof(coords_type)); 3833 3834 length = snprintf(intrinsic_name, sizeof(intrinsic_name), 3835 "llvm.amdgcn.image.atomic.%s.%s", atomic_name, coords_type); 3836 } 3837 3838 assert(length < sizeof(intrinsic_name)); 3839 return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32, params, param_count, 0); 3840 } 3841 3842 static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, 3843 const nir_intrinsic_instr *instr) 3844 { 3845 LLVMValueRef res; 3846 const nir_variable *var = instr->variables[0]->var; 3847 const struct glsl_type *type = instr->variables[0]->var->type; 3848 bool da = glsl_sampler_type_is_array(var->type) || 3849 glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_CUBE || 3850 glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_3D; 3851 if(instr->variables[0]->deref.child) 3852 type = instr->variables[0]->deref.child->type; 3853 3854 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) 3855 return get_buffer_size(ctx, 3856 get_sampler_desc(ctx, instr->variables[0], 3857 AC_DESC_BUFFER, NULL, true, false), true); 3858 3859 struct ac_image_args args = { 0 }; 3860 3861 args.da = da; 3862 args.dmask = 0xf; 3863 args.resource = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false); 3864 args.opcode = ac_image_get_resinfo; 3865 args.addr = ctx->ac.i32_0; 3866 3867 res = ac_build_image_opcode(&ctx->ac, &args); 3868 3869 LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false); 3870 3871 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE && 3872 glsl_sampler_type_is_array(type)) { 3873 LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false); 3874 LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, ""); 3875 z = LLVMBuildSDiv(ctx->ac.builder, z, six, ""); 3876 res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, ""); 3877 } 3878 if (ctx->ac.chip_class >= GFX9 && 3879 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_1D && 3880 glsl_sampler_type_is_array(type)) { 3881 LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, ""); 3882 res = LLVMBuildInsertElement(ctx->ac.builder, res, layers, 3883 ctx->ac.i32_1, ""); 3884 3885 } 3886 return res; 3887 } 3888 3889 #define NOOP_WAITCNT 0xf7f 3890 #define LGKM_CNT 0x07f 3891 #define VM_CNT 0xf70 3892 3893 static void emit_membar(struct nir_to_llvm_context *ctx, 3894 const nir_intrinsic_instr *instr) 3895 { 3896 unsigned waitcnt = NOOP_WAITCNT; 3897 3898 switch (instr->intrinsic) { 3899 case nir_intrinsic_memory_barrier: 3900 case nir_intrinsic_group_memory_barrier: 3901 waitcnt &= VM_CNT & LGKM_CNT; 3902 break; 3903 case nir_intrinsic_memory_barrier_atomic_counter: 3904 case nir_intrinsic_memory_barrier_buffer: 3905 case nir_intrinsic_memory_barrier_image: 3906 waitcnt &= VM_CNT; 3907 break; 3908 case nir_intrinsic_memory_barrier_shared: 3909 waitcnt &= LGKM_CNT; 3910 break; 3911 default: 3912 break; 3913 } 3914 if (waitcnt != NOOP_WAITCNT) 3915 ac_build_waitcnt(&ctx->ac, waitcnt); 3916 } 3917 3918 static void emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage) 3919 { 3920 /* SI only (thanks to a hw bug workaround): 3921 * The real barrier instruction isnt needed, because an entire patch 3922 * always fits into a single wave. 3923 */ 3924 if (ac->chip_class == SI && stage == MESA_SHADER_TESS_CTRL) { 3925 ac_build_waitcnt(ac, LGKM_CNT & VM_CNT); 3926 return; 3927 } 3928 ac_build_intrinsic(ac, "llvm.amdgcn.s.barrier", 3929 ac->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT); 3930 } 3931 3932 static void emit_discard(struct ac_nir_context *ctx, 3933 const nir_intrinsic_instr *instr) 3934 { 3935 LLVMValueRef cond; 3936 3937 if (instr->intrinsic == nir_intrinsic_discard_if) { 3938 cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, 3939 get_src(ctx, instr->src[0]), 3940 ctx->ac.i32_0, ""); 3941 } else { 3942 assert(instr->intrinsic == nir_intrinsic_discard); 3943 cond = LLVMConstInt(ctx->ac.i1, false, 0); 3944 } 3945 3946 ac_build_kill_if_false(&ctx->ac, cond); 3947 } 3948 3949 static LLVMValueRef 3950 visit_load_helper_invocation(struct ac_nir_context *ctx) 3951 { 3952 LLVMValueRef result = ac_build_intrinsic(&ctx->ac, 3953 "llvm.amdgcn.ps.live", 3954 ctx->ac.i1, NULL, 0, 3955 AC_FUNC_ATTR_READNONE); 3956 result = LLVMBuildNot(ctx->ac.builder, result, ""); 3957 return LLVMBuildSExt(ctx->ac.builder, result, ctx->ac.i32, ""); 3958 } 3959 3960 static LLVMValueRef 3961 visit_load_local_invocation_index(struct nir_to_llvm_context *ctx) 3962 { 3963 LLVMValueRef result; 3964 LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac); 3965 result = LLVMBuildAnd(ctx->builder, ctx->tg_size, 3966 LLVMConstInt(ctx->ac.i32, 0xfc0, false), ""); 3967 3968 return LLVMBuildAdd(ctx->builder, result, thread_id, ""); 3969 } 3970 3971 static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx, 3972 const nir_intrinsic_instr *instr) 3973 { 3974 LLVMValueRef ptr, result; 3975 LLVMValueRef src = get_src(ctx->nir, instr->src[0]); 3976 ptr = build_gep_for_deref(ctx->nir, instr->variables[0]); 3977 3978 if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) { 3979 LLVMValueRef src1 = get_src(ctx->nir, instr->src[1]); 3980 result = LLVMBuildAtomicCmpXchg(ctx->builder, 3981 ptr, src, src1, 3982 LLVMAtomicOrderingSequentiallyConsistent, 3983 LLVMAtomicOrderingSequentiallyConsistent, 3984 false); 3985 } else { 3986 LLVMAtomicRMWBinOp op; 3987 switch (instr->intrinsic) { 3988 case nir_intrinsic_var_atomic_add: 3989 op = LLVMAtomicRMWBinOpAdd; 3990 break; 3991 case nir_intrinsic_var_atomic_umin: 3992 op = LLVMAtomicRMWBinOpUMin; 3993 break; 3994 case nir_intrinsic_var_atomic_umax: 3995 op = LLVMAtomicRMWBinOpUMax; 3996 break; 3997 case nir_intrinsic_var_atomic_imin: 3998 op = LLVMAtomicRMWBinOpMin; 3999 break; 4000 case nir_intrinsic_var_atomic_imax: 4001 op = LLVMAtomicRMWBinOpMax; 4002 break; 4003 case nir_intrinsic_var_atomic_and: 4004 op = LLVMAtomicRMWBinOpAnd; 4005 break; 4006 case nir_intrinsic_var_atomic_or: 4007 op = LLVMAtomicRMWBinOpOr; 4008 break; 4009 case nir_intrinsic_var_atomic_xor: 4010 op = LLVMAtomicRMWBinOpXor; 4011 break; 4012 case nir_intrinsic_var_atomic_exchange: 4013 op = LLVMAtomicRMWBinOpXchg; 4014 break; 4015 default: 4016 return NULL; 4017 } 4018 4019 result = LLVMBuildAtomicRMW(ctx->builder, op, ptr, ac_to_integer(&ctx->ac, src), 4020 LLVMAtomicOrderingSequentiallyConsistent, 4021 false); 4022 } 4023 return result; 4024 } 4025 4026 #define INTERP_CENTER 0 4027 #define INTERP_CENTROID 1 4028 #define INTERP_SAMPLE 2 4029 4030 static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx, 4031 enum glsl_interp_mode interp, unsigned location) 4032 { 4033 switch (interp) { 4034 case INTERP_MODE_FLAT: 4035 default: 4036 return NULL; 4037 case INTERP_MODE_SMOOTH: 4038 case INTERP_MODE_NONE: 4039 if (location == INTERP_CENTER) 4040 return ctx->persp_center; 4041 else if (location == INTERP_CENTROID) 4042 return ctx->persp_centroid; 4043 else if (location == INTERP_SAMPLE) 4044 return ctx->persp_sample; 4045 break; 4046 case INTERP_MODE_NOPERSPECTIVE: 4047 if (location == INTERP_CENTER) 4048 return ctx->linear_center; 4049 else if (location == INTERP_CENTROID) 4050 return ctx->linear_centroid; 4051 else if (location == INTERP_SAMPLE) 4052 return ctx->linear_sample; 4053 break; 4054 } 4055 return NULL; 4056 } 4057 4058 static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx, 4059 LLVMValueRef sample_id) 4060 { 4061 LLVMValueRef result; 4062 LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_PS_SAMPLE_POSITIONS, false)); 4063 4064 ptr = LLVMBuildBitCast(ctx->builder, ptr, 4065 const_array(ctx->ac.v2f32, 64), ""); 4066 4067 sample_id = LLVMBuildAdd(ctx->builder, sample_id, ctx->sample_pos_offset, ""); 4068 result = ac_build_load_invariant(&ctx->ac, ptr, sample_id); 4069 4070 return result; 4071 } 4072 4073 static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx) 4074 { 4075 LLVMValueRef values[2]; 4076 4077 values[0] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[0]); 4078 values[1] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[1]); 4079 return ac_build_gather_values(&ctx->ac, values, 2); 4080 } 4081 4082 static LLVMValueRef load_sample_mask_in(struct ac_nir_context *ctx) 4083 { 4084 uint8_t log2_ps_iter_samples = ctx->nctx->shader_info->info.ps.force_persample ? ctx->nctx->options->key.fs.log2_num_samples : ctx->nctx->options->key.fs.log2_ps_iter_samples; 4085 4086 /* The bit pattern matches that used by fixed function fragment 4087 * processing. */ 4088 static const uint16_t ps_iter_masks[] = { 4089 0xffff, /* not used */ 4090 0x5555, 4091 0x1111, 4092 0x0101, 4093 0x0001, 4094 }; 4095 assert(log2_ps_iter_samples < ARRAY_SIZE(ps_iter_masks)); 4096 4097 uint32_t ps_iter_mask = ps_iter_masks[log2_ps_iter_samples]; 4098 4099 LLVMValueRef result, sample_id; 4100 sample_id = unpack_param(&ctx->ac, ctx->abi->ancillary, 8, 4); 4101 sample_id = LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, ps_iter_mask, false), sample_id, ""); 4102 result = LLVMBuildAnd(ctx->ac.builder, sample_id, ctx->abi->sample_coverage, ""); 4103 return result; 4104 } 4105 4106 static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx, 4107 const nir_intrinsic_instr *instr) 4108 { 4109 LLVMValueRef result[4]; 4110 LLVMValueRef interp_param, attr_number; 4111 unsigned location; 4112 unsigned chan; 4113 LLVMValueRef src_c0 = NULL; 4114 LLVMValueRef src_c1 = NULL; 4115 LLVMValueRef src0 = NULL; 4116 int input_index = instr->variables[0]->var->data.location - VARYING_SLOT_VAR0; 4117 switch (instr->intrinsic) { 4118 case nir_intrinsic_interp_var_at_centroid: 4119 location = INTERP_CENTROID; 4120 break; 4121 case nir_intrinsic_interp_var_at_sample: 4122 case nir_intrinsic_interp_var_at_offset: 4123 location = INTERP_CENTER; 4124 src0 = get_src(ctx->nir, instr->src[0]); 4125 break; 4126 default: 4127 break; 4128 } 4129 4130 if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) { 4131 src_c0 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->ac.i32_0, "")); 4132 src_c1 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->ac.i32_1, "")); 4133 } else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) { 4134 LLVMValueRef sample_position; 4135 LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f); 4136 4137 /* fetch sample ID */ 4138 sample_position = load_sample_position(ctx, src0); 4139 4140 src_c0 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->ac.i32_0, ""); 4141 src_c0 = LLVMBuildFSub(ctx->builder, src_c0, halfval, ""); 4142 src_c1 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->ac.i32_1, ""); 4143 src_c1 = LLVMBuildFSub(ctx->builder, src_c1, halfval, ""); 4144 } 4145 interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location); 4146 attr_number = LLVMConstInt(ctx->ac.i32, input_index, false); 4147 4148 if (location == INTERP_CENTER) { 4149 LLVMValueRef ij_out[2]; 4150 LLVMValueRef ddxy_out = emit_ddxy_interp(ctx->nir, interp_param); 4151 4152 /* 4153 * take the I then J parameters, and the DDX/Y for it, and 4154 * calculate the IJ inputs for the interpolator. 4155 * temp1 = ddx * offset/sample.x + I; 4156 * interp_param.I = ddy * offset/sample.y + temp1; 4157 * temp1 = ddx * offset/sample.x + J; 4158 * interp_param.J = ddy * offset/sample.y + temp1; 4159 */ 4160 for (unsigned i = 0; i < 2; i++) { 4161 LLVMValueRef ix_ll = LLVMConstInt(ctx->ac.i32, i, false); 4162 LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false); 4163 LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->builder, 4164 ddxy_out, ix_ll, ""); 4165 LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->builder, 4166 ddxy_out, iy_ll, ""); 4167 LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->builder, 4168 interp_param, ix_ll, ""); 4169 LLVMValueRef temp1, temp2; 4170 4171 interp_el = LLVMBuildBitCast(ctx->builder, interp_el, 4172 ctx->ac.f32, ""); 4173 4174 temp1 = LLVMBuildFMul(ctx->builder, ddx_el, src_c0, ""); 4175 temp1 = LLVMBuildFAdd(ctx->builder, temp1, interp_el, ""); 4176 4177 temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, ""); 4178 temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, ""); 4179 4180 ij_out[i] = LLVMBuildBitCast(ctx->builder, 4181 temp2, ctx->ac.i32, ""); 4182 } 4183 interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2); 4184 4185 } 4186 4187 for (chan = 0; chan < 4; chan++) { 4188 LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); 4189 4190 if (interp_param) { 4191 interp_param = LLVMBuildBitCast(ctx->builder, 4192 interp_param, ctx->ac.v2f32, ""); 4193 LLVMValueRef i = LLVMBuildExtractElement( 4194 ctx->builder, interp_param, ctx->ac.i32_0, ""); 4195 LLVMValueRef j = LLVMBuildExtractElement( 4196 ctx->builder, interp_param, ctx->ac.i32_1, ""); 4197 4198 result[chan] = ac_build_fs_interp(&ctx->ac, 4199 llvm_chan, attr_number, 4200 ctx->prim_mask, i, j); 4201 } else { 4202 result[chan] = ac_build_fs_interp_mov(&ctx->ac, 4203 LLVMConstInt(ctx->ac.i32, 2, false), 4204 llvm_chan, attr_number, 4205 ctx->prim_mask); 4206 } 4207 } 4208 return ac_build_varying_gather_values(&ctx->ac, result, instr->num_components, 4209 instr->variables[0]->var->data.location_frac); 4210 } 4211 4212 static void 4213 visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs) 4214 { 4215 LLVMValueRef gs_next_vertex; 4216 LLVMValueRef can_emit; 4217 int idx; 4218 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); 4219 4220 assert(stream == 0); 4221 4222 /* Write vertex attribute values to GSVS ring */ 4223 gs_next_vertex = LLVMBuildLoad(ctx->builder, 4224 ctx->gs_next_vertex, 4225 ""); 4226 4227 /* If this thread has already emitted the declared maximum number of 4228 * vertices, kill it: excessive vertex emissions are not supposed to 4229 * have any effect, and GS threads have no externally observable 4230 * effects other than emitting vertices. 4231 */ 4232 can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex, 4233 LLVMConstInt(ctx->ac.i32, ctx->gs_max_out_vertices, false), ""); 4234 ac_build_kill_if_false(&ctx->ac, can_emit); 4235 4236 /* loop num outputs */ 4237 idx = 0; 4238 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { 4239 LLVMValueRef *out_ptr = &addrs[i * 4]; 4240 int length = 4; 4241 int slot = idx; 4242 int slot_inc = 1; 4243 4244 if (!(ctx->output_mask & (1ull << i))) 4245 continue; 4246 4247 if (i == VARYING_SLOT_CLIP_DIST0) { 4248 /* pack clip and cull into a single set of slots */ 4249 length = ctx->num_output_clips + ctx->num_output_culls; 4250 if (length > 4) 4251 slot_inc = 2; 4252 } 4253 for (unsigned j = 0; j < length; j++) { 4254 LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, 4255 out_ptr[j], ""); 4256 LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, (slot * 4 + j) * ctx->gs_max_out_vertices, false); 4257 voffset = LLVMBuildAdd(ctx->builder, voffset, gs_next_vertex, ""); 4258 voffset = LLVMBuildMul(ctx->builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), ""); 4259 4260 out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->ac.i32, ""); 4261 4262 ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring, 4263 out_val, 1, 4264 voffset, ctx->gs2vs_offset, 0, 4265 1, 1, true, true); 4266 } 4267 idx += slot_inc; 4268 } 4269 4270 gs_next_vertex = LLVMBuildAdd(ctx->builder, gs_next_vertex, 4271 ctx->ac.i32_1, ""); 4272 LLVMBuildStore(ctx->builder, gs_next_vertex, ctx->gs_next_vertex); 4273 4274 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (0 << 8), ctx->gs_wave_id); 4275 } 4276 4277 static void 4278 visit_end_primitive(struct ac_shader_abi *abi, unsigned stream) 4279 { 4280 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); 4281 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8), ctx->gs_wave_id); 4282 } 4283 4284 static LLVMValueRef 4285 load_tess_coord(struct ac_shader_abi *abi, LLVMTypeRef type, 4286 unsigned num_components) 4287 { 4288 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); 4289 4290 LLVMValueRef coord[4] = { 4291 ctx->tes_u, 4292 ctx->tes_v, 4293 ctx->ac.f32_0, 4294 ctx->ac.f32_0, 4295 }; 4296 4297 if (ctx->tes_primitive_mode == GL_TRIANGLES) 4298 coord[2] = LLVMBuildFSub(ctx->builder, ctx->ac.f32_1, 4299 LLVMBuildFAdd(ctx->builder, coord[0], coord[1], ""), ""); 4300 4301 LLVMValueRef result = ac_build_gather_values(&ctx->ac, coord, num_components); 4302 return LLVMBuildBitCast(ctx->builder, result, type, ""); 4303 } 4304 4305 static LLVMValueRef 4306 load_patch_vertices_in(struct ac_shader_abi *abi) 4307 { 4308 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); 4309 return LLVMConstInt(ctx->ac.i32, ctx->options->key.tcs.input_vertices, false); 4310 } 4311 4312 static void visit_intrinsic(struct ac_nir_context *ctx, 4313 nir_intrinsic_instr *instr) 4314 { 4315 LLVMValueRef result = NULL; 4316 4317 switch (instr->intrinsic) { 4318 case nir_intrinsic_ballot: 4319 result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0])); 4320 break; 4321 case nir_intrinsic_read_invocation: 4322 case nir_intrinsic_read_first_invocation: { 4323 LLVMValueRef args[2]; 4324 4325 /* Value */ 4326 args[0] = get_src(ctx, instr->src[0]); 4327 4328 unsigned num_args; 4329 const char *intr_name; 4330 if (instr->intrinsic == nir_intrinsic_read_invocation) { 4331 num_args = 2; 4332 intr_name = "llvm.amdgcn.readlane"; 4333 4334 /* Invocation */ 4335 args[1] = get_src(ctx, instr->src[1]); 4336 } else { 4337 num_args = 1; 4338 intr_name = "llvm.amdgcn.readfirstlane"; 4339 } 4340 4341 /* We currently have no other way to prevent LLVM from lifting the icmp 4342 * calls to a dominating basic block. 4343 */ 4344 ac_build_optimization_barrier(&ctx->ac, &args[0]); 4345 4346 result = ac_build_intrinsic(&ctx->ac, intr_name, 4347 ctx->ac.i32, args, num_args, 4348 AC_FUNC_ATTR_READNONE | 4349 AC_FUNC_ATTR_CONVERGENT); 4350 break; 4351 } 4352 case nir_intrinsic_load_subgroup_invocation: 4353 result = ac_get_thread_id(&ctx->ac); 4354 break; 4355 case nir_intrinsic_load_work_group_id: { 4356 LLVMValueRef values[3]; 4357 4358 for (int i = 0; i < 3; i++) { 4359 values[i] = ctx->nctx->workgroup_ids[i] ? 4360 ctx->nctx->workgroup_ids[i] : ctx->ac.i32_0; 4361 } 4362 4363 result = ac_build_gather_values(&ctx->ac, values, 3); 4364 break; 4365 } 4366 case nir_intrinsic_load_base_vertex: { 4367 result = ctx->abi->base_vertex; 4368 break; 4369 } 4370 case nir_intrinsic_load_vertex_id_zero_base: { 4371 result = ctx->abi->vertex_id; 4372 break; 4373 } 4374 case nir_intrinsic_load_local_invocation_id: { 4375 result = ctx->nctx->local_invocation_ids; 4376 break; 4377 } 4378 case nir_intrinsic_load_base_instance: 4379 result = ctx->abi->start_instance; 4380 break; 4381 case nir_intrinsic_load_draw_id: 4382 result = ctx->abi->draw_id; 4383 break; 4384 case nir_intrinsic_load_view_index: 4385 result = ctx->nctx->view_index ? ctx->nctx->view_index : ctx->ac.i32_0; 4386 break; 4387 case nir_intrinsic_load_invocation_id: 4388 if (ctx->stage == MESA_SHADER_TESS_CTRL) 4389 result = unpack_param(&ctx->ac, ctx->abi->tcs_rel_ids, 8, 5); 4390 else 4391 result = ctx->abi->gs_invocation_id; 4392 break; 4393 case nir_intrinsic_load_primitive_id: 4394 if (ctx->stage == MESA_SHADER_GEOMETRY) { 4395 result = ctx->abi->gs_prim_id; 4396 } else if (ctx->stage == MESA_SHADER_TESS_CTRL) { 4397 result = ctx->abi->tcs_patch_id; 4398 } else if (ctx->stage == MESA_SHADER_TESS_EVAL) { 4399 result = ctx->abi->tes_patch_id; 4400 } else 4401 fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage); 4402 break; 4403 case nir_intrinsic_load_sample_id: 4404 result = unpack_param(&ctx->ac, ctx->abi->ancillary, 8, 4); 4405 break; 4406 case nir_intrinsic_load_sample_pos: 4407 result = load_sample_pos(ctx); 4408 break; 4409 case nir_intrinsic_load_sample_mask_in: 4410 if (ctx->nctx) 4411 result = load_sample_mask_in(ctx); 4412 else 4413 result = ctx->abi->sample_coverage; 4414 break; 4415 case nir_intrinsic_load_frag_coord: { 4416 LLVMValueRef values[4] = { 4417 ctx->abi->frag_pos[0], 4418 ctx->abi->frag_pos[1], 4419 ctx->abi->frag_pos[2], 4420 ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ctx->abi->frag_pos[3]) 4421 }; 4422 result = ac_build_gather_values(&ctx->ac, values, 4); 4423 break; 4424 } 4425 case nir_intrinsic_load_front_face: 4426 result = ctx->abi->front_face; 4427 break; 4428 case nir_intrinsic_load_helper_invocation: 4429 result = visit_load_helper_invocation(ctx); 4430 break; 4431 case nir_intrinsic_load_instance_id: 4432 result = ctx->abi->instance_id; 4433 break; 4434 case nir_intrinsic_load_num_work_groups: 4435 result = ctx->nctx->num_work_groups; 4436 break; 4437 case nir_intrinsic_load_local_invocation_index: 4438 result = visit_load_local_invocation_index(ctx->nctx); 4439 break; 4440 case nir_intrinsic_load_push_constant: 4441 result = visit_load_push_constant(ctx->nctx, instr); 4442 break; 4443 case nir_intrinsic_vulkan_resource_index: 4444 result = visit_vulkan_resource_index(ctx->nctx, instr); 4445 break; 4446 case nir_intrinsic_vulkan_resource_reindex: 4447 result = visit_vulkan_resource_reindex(ctx->nctx, instr); 4448 break; 4449 case nir_intrinsic_store_ssbo: 4450 visit_store_ssbo(ctx, instr); 4451 break; 4452 case nir_intrinsic_load_ssbo: 4453 result = visit_load_buffer(ctx, instr); 4454 break; 4455 case nir_intrinsic_ssbo_atomic_add: 4456 case nir_intrinsic_ssbo_atomic_imin: 4457 case nir_intrinsic_ssbo_atomic_umin: 4458 case nir_intrinsic_ssbo_atomic_imax: 4459 case nir_intrinsic_ssbo_atomic_umax: 4460 case nir_intrinsic_ssbo_atomic_and: 4461 case nir_intrinsic_ssbo_atomic_or: 4462 case nir_intrinsic_ssbo_atomic_xor: 4463 case nir_intrinsic_ssbo_atomic_exchange: 4464 case nir_intrinsic_ssbo_atomic_comp_swap: 4465 result = visit_atomic_ssbo(ctx, instr); 4466 break; 4467 case nir_intrinsic_load_ubo: 4468 result = visit_load_ubo_buffer(ctx, instr); 4469 break; 4470 case nir_intrinsic_get_buffer_size: 4471 result = visit_get_buffer_size(ctx, instr); 4472 break; 4473 case nir_intrinsic_load_var: 4474 result = visit_load_var(ctx, instr); 4475 break; 4476 case nir_intrinsic_store_var: 4477 visit_store_var(ctx, instr); 4478 break; 4479 case nir_intrinsic_image_load: 4480 result = visit_image_load(ctx, instr); 4481 break; 4482 case nir_intrinsic_image_store: 4483 visit_image_store(ctx, instr); 4484 break; 4485 case nir_intrinsic_image_atomic_add: 4486 case nir_intrinsic_image_atomic_min: 4487 case nir_intrinsic_image_atomic_max: 4488 case nir_intrinsic_image_atomic_and: 4489 case nir_intrinsic_image_atomic_or: 4490 case nir_intrinsic_image_atomic_xor: 4491 case nir_intrinsic_image_atomic_exchange: 4492 case nir_intrinsic_image_atomic_comp_swap: 4493 result = visit_image_atomic(ctx, instr); 4494 break; 4495 case nir_intrinsic_image_size: 4496 result = visit_image_size(ctx, instr); 4497 break; 4498 case nir_intrinsic_discard: 4499 case nir_intrinsic_discard_if: 4500 emit_discard(ctx, instr); 4501 break; 4502 case nir_intrinsic_memory_barrier: 4503 case nir_intrinsic_group_memory_barrier: 4504 case nir_intrinsic_memory_barrier_atomic_counter: 4505 case nir_intrinsic_memory_barrier_buffer: 4506 case nir_intrinsic_memory_barrier_image: 4507 case nir_intrinsic_memory_barrier_shared: 4508 emit_membar(ctx->nctx, instr); 4509 break; 4510 case nir_intrinsic_barrier: 4511 emit_barrier(&ctx->ac, ctx->stage); 4512 break; 4513 case nir_intrinsic_var_atomic_add: 4514 case nir_intrinsic_var_atomic_imin: 4515 case nir_intrinsic_var_atomic_umin: 4516 case nir_intrinsic_var_atomic_imax: 4517 case nir_intrinsic_var_atomic_umax: 4518 case nir_intrinsic_var_atomic_and: 4519 case nir_intrinsic_var_atomic_or: 4520 case nir_intrinsic_var_atomic_xor: 4521 case nir_intrinsic_var_atomic_exchange: 4522 case nir_intrinsic_var_atomic_comp_swap: 4523 result = visit_var_atomic(ctx->nctx, instr); 4524 break; 4525 case nir_intrinsic_interp_var_at_centroid: 4526 case nir_intrinsic_interp_var_at_sample: 4527 case nir_intrinsic_interp_var_at_offset: 4528 result = visit_interp(ctx->nctx, instr); 4529 break; 4530 case nir_intrinsic_emit_vertex: 4531 ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->outputs); 4532 break; 4533 case nir_intrinsic_end_primitive: 4534 ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr)); 4535 break; 4536 case nir_intrinsic_load_tess_coord: { 4537 LLVMTypeRef type = ctx->nctx ? 4538 get_def_type(ctx->nctx->nir, &instr->dest.ssa) : 4539 NULL; 4540 result = ctx->abi->load_tess_coord(ctx->abi, type, instr->num_components); 4541 break; 4542 } 4543 case nir_intrinsic_load_tess_level_outer: 4544 result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER); 4545 break; 4546 case nir_intrinsic_load_tess_level_inner: 4547 result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER); 4548 break; 4549 case nir_intrinsic_load_patch_vertices_in: 4550 result = ctx->abi->load_patch_vertices_in(ctx->abi); 4551 break; 4552 case nir_intrinsic_vote_all: { 4553 LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0])); 4554 result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, ""); 4555 break; 4556 } 4557 case nir_intrinsic_vote_any: { 4558 LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0])); 4559 result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, ""); 4560 break; 4561 } 4562 case nir_intrinsic_vote_eq: { 4563 LLVMValueRef tmp = ac_build_vote_eq(&ctx->ac, get_src(ctx, instr->src[0])); 4564 result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, ""); 4565 break; 4566 } 4567 default: 4568 fprintf(stderr, "Unknown intrinsic: "); 4569 nir_print_instr(&instr->instr, stderr); 4570 fprintf(stderr, "\n"); 4571 break; 4572 } 4573 if (result) { 4574 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result); 4575 } 4576 } 4577 4578 static LLVMValueRef radv_load_ssbo(struct ac_shader_abi *abi, 4579 LLVMValueRef buffer_ptr, bool write) 4580 { 4581 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); 4582 4583 if (write && ctx->stage == MESA_SHADER_FRAGMENT) 4584 ctx->shader_info->fs.writes_memory = true; 4585 4586 return LLVMBuildLoad(ctx->builder, buffer_ptr, ""); 4587 } 4588 4589 static LLVMValueRef radv_load_ubo(struct ac_shader_abi *abi, LLVMValueRef buffer_ptr) 4590 { 4591 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); 4592 LLVMValueRef result; 4593 4594 LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md); 4595 4596 result = LLVMBuildLoad(ctx->builder, buffer_ptr, ""); 4597 LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md); 4598 4599 return result; 4600 } 4601 4602 static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi, 4603 unsigned descriptor_set, 4604 unsigned base_index, 4605 unsigned constant_index, 4606 LLVMValueRef index, 4607 enum ac_descriptor_type desc_type, 4608 bool image, bool write) 4609 { 4610 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); 4611 LLVMValueRef list = ctx->descriptor_sets[descriptor_set]; 4612 struct radv_descriptor_set_layout *layout = ctx->options->layout->set[descriptor_set].layout; 4613 struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index; 4614 unsigned offset = binding->offset; 4615 unsigned stride = binding->size; 4616 unsigned type_size; 4617 LLVMBuilderRef builder = ctx->builder; 4618 LLVMTypeRef type; 4619 4620 assert(base_index < layout->binding_count); 4621 4622 if (write && ctx->stage == MESA_SHADER_FRAGMENT) 4623 ctx->shader_info->fs.writes_memory = true; 4624 4625 switch (desc_type) { 4626 case AC_DESC_IMAGE: 4627 type = ctx->ac.v8i32; 4628 type_size = 32; 4629 break; 4630 case AC_DESC_FMASK: 4631 type = ctx->ac.v8i32; 4632 offset += 32; 4633 type_size = 32; 4634 break; 4635 case AC_DESC_SAMPLER: 4636 type = ctx->ac.v4i32; 4637 if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) 4638 offset += 64; 4639 4640 type_size = 16; 4641 break; 4642 case AC_DESC_BUFFER: 4643 type = ctx->ac.v4i32; 4644 type_size = 16; 4645 break; 4646 default: 4647 unreachable("invalid desc_type\n"); 4648 } 4649 4650 offset += constant_index * stride; 4651 4652 if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset && 4653 (!index || binding->immutable_samplers_equal)) { 4654 if (binding->immutable_samplers_equal) 4655 constant_index = 0; 4656 4657 const uint32_t *samplers = radv_immutable_samplers(layout, binding); 4658 4659 LLVMValueRef constants[] = { 4660 LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 0], 0), 4661 LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 1], 0), 4662 LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 2], 0), 4663 LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 3], 0), 4664 }; 4665 return ac_build_gather_values(&ctx->ac, constants, 4); 4666 } 4667 4668 assert(stride % type_size == 0); 4669 4670 if (!index) 4671 index = ctx->ac.i32_0; 4672 4673 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), ""); 4674 4675 list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, offset, 0)); 4676 list = LLVMBuildPointerCast(builder, list, const_array(type, 0), ""); 4677 4678 return ac_build_load_to_sgpr(&ctx->ac, list, index); 4679 } 4680 4681 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, 4682 const nir_deref_var *deref, 4683 enum ac_descriptor_type desc_type, 4684 const nir_tex_instr *tex_instr, 4685 bool image, bool write) 4686 { 4687 LLVMValueRef index = NULL; 4688 unsigned constant_index = 0; 4689 unsigned descriptor_set; 4690 unsigned base_index; 4691 4692 if (!deref) { 4693 assert(tex_instr && !image); 4694 descriptor_set = 0; 4695 base_index = tex_instr->sampler_index; 4696 } else { 4697 const nir_deref *tail = &deref->deref; 4698 while (tail->child) { 4699 const nir_deref_array *child = nir_deref_as_array(tail->child); 4700 unsigned array_size = glsl_get_aoa_size(tail->child->type); 4701 4702 if (!array_size) 4703 array_size = 1; 4704 4705 assert(child->deref_array_type != nir_deref_array_type_wildcard); 4706 4707 if (child->deref_array_type == nir_deref_array_type_indirect) { 4708 LLVMValueRef indirect = get_src(ctx, child->indirect); 4709 4710 indirect = LLVMBuildMul(ctx->ac.builder, indirect, 4711 LLVMConstInt(ctx->ac.i32, array_size, false), ""); 4712 4713 if (!index) 4714 index = indirect; 4715 else 4716 index = LLVMBuildAdd(ctx->ac.builder, index, indirect, ""); 4717 } 4718 4719 constant_index += child->base_offset * array_size; 4720 4721 tail = &child->deref; 4722 } 4723 descriptor_set = deref->var->data.descriptor_set; 4724 base_index = deref->var->data.binding; 4725 } 4726 4727 return ctx->abi->load_sampler_desc(ctx->abi, 4728 descriptor_set, 4729 base_index, 4730 constant_index, index, 4731 desc_type, image, write); 4732 } 4733 4734 static void set_tex_fetch_args(struct ac_llvm_context *ctx, 4735 struct ac_image_args *args, 4736 const nir_tex_instr *instr, 4737 nir_texop op, 4738 LLVMValueRef res_ptr, LLVMValueRef samp_ptr, 4739 LLVMValueRef *param, unsigned count, 4740 unsigned dmask) 4741 { 4742 unsigned is_rect = 0; 4743 bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE; 4744 4745 if (op == nir_texop_lod) 4746 da = false; 4747 /* Pad to power of two vector */ 4748 while (count < util_next_power_of_two(count)) 4749 param[count++] = LLVMGetUndef(ctx->i32); 4750 4751 if (count > 1) 4752 args->addr = ac_build_gather_values(ctx, param, count); 4753 else 4754 args->addr = param[0]; 4755 4756 args->resource = res_ptr; 4757 args->sampler = samp_ptr; 4758 4759 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) { 4760 args->addr = param[0]; 4761 return; 4762 } 4763 4764 args->dmask = dmask; 4765 args->unorm = is_rect; 4766 args->da = da; 4767 } 4768 4769 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. 4770 * 4771 * SI-CI: 4772 * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic 4773 * filtering manually. The driver sets img7 to a mask clearing 4774 * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do: 4775 * s_and_b32 samp0, samp0, img7 4776 * 4777 * VI: 4778 * The ANISO_OVERRIDE sampler field enables this fix in TA. 4779 */ 4780 static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx, 4781 LLVMValueRef res, LLVMValueRef samp) 4782 { 4783 LLVMBuilderRef builder = ctx->ac.builder; 4784 LLVMValueRef img7, samp0; 4785 4786 if (ctx->ac.chip_class >= VI) 4787 return samp; 4788 4789 img7 = LLVMBuildExtractElement(builder, res, 4790 LLVMConstInt(ctx->ac.i32, 7, 0), ""); 4791 samp0 = LLVMBuildExtractElement(builder, samp, 4792 LLVMConstInt(ctx->ac.i32, 0, 0), ""); 4793 samp0 = LLVMBuildAnd(builder, samp0, img7, ""); 4794 return LLVMBuildInsertElement(builder, samp, samp0, 4795 LLVMConstInt(ctx->ac.i32, 0, 0), ""); 4796 } 4797 4798 static void tex_fetch_ptrs(struct ac_nir_context *ctx, 4799 nir_tex_instr *instr, 4800 LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, 4801 LLVMValueRef *fmask_ptr) 4802 { 4803 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) 4804 *res_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_BUFFER, instr, false, false); 4805 else 4806 *res_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_IMAGE, instr, false, false); 4807 if (samp_ptr) { 4808 if (instr->sampler) 4809 *samp_ptr = get_sampler_desc(ctx, instr->sampler, AC_DESC_SAMPLER, instr, false, false); 4810 else 4811 *samp_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_SAMPLER, instr, false, false); 4812 if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT) 4813 *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr); 4814 } 4815 if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms || 4816 instr->op == nir_texop_samples_identical)) 4817 *fmask_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_FMASK, instr, false, false); 4818 } 4819 4820 static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx, 4821 LLVMValueRef coord) 4822 { 4823 coord = ac_to_float(ctx, coord); 4824 coord = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &coord, 1, 0); 4825 coord = ac_to_integer(ctx, coord); 4826 return coord; 4827 } 4828 4829 static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) 4830 { 4831 LLVMValueRef result = NULL; 4832 struct ac_image_args args = { 0 }; 4833 unsigned dmask = 0xf; 4834 LLVMValueRef address[16]; 4835 LLVMValueRef coords[5]; 4836 LLVMValueRef coord = NULL, lod = NULL, comparator = NULL; 4837 LLVMValueRef bias = NULL, offsets = NULL; 4838 LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL; 4839 LLVMValueRef ddx = NULL, ddy = NULL; 4840 LLVMValueRef derivs[6]; 4841 unsigned chan, count = 0; 4842 unsigned const_src = 0, num_deriv_comp = 0; 4843 bool lod_is_zero = false; 4844 4845 tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr); 4846 4847 for (unsigned i = 0; i < instr->num_srcs; i++) { 4848 switch (instr->src[i].src_type) { 4849 case nir_tex_src_coord: 4850 coord = get_src(ctx, instr->src[i].src); 4851 break; 4852 case nir_tex_src_projector: 4853 break; 4854 case nir_tex_src_comparator: 4855 comparator = get_src(ctx, instr->src[i].src); 4856 break; 4857 case nir_tex_src_offset: 4858 offsets = get_src(ctx, instr->src[i].src); 4859 const_src = i; 4860 break; 4861 case nir_tex_src_bias: 4862 bias = get_src(ctx, instr->src[i].src); 4863 break; 4864 case nir_tex_src_lod: { 4865 nir_const_value *val = nir_src_as_const_value(instr->src[i].src); 4866 4867 if (val && val->i32[0] == 0) 4868 lod_is_zero = true; 4869 lod = get_src(ctx, instr->src[i].src); 4870 break; 4871 } 4872 case nir_tex_src_ms_index: 4873 sample_index = get_src(ctx, instr->src[i].src); 4874 break; 4875 case nir_tex_src_ms_mcs: 4876 break; 4877 case nir_tex_src_ddx: 4878 ddx = get_src(ctx, instr->src[i].src); 4879 num_deriv_comp = instr->src[i].src.ssa->num_components; 4880 break; 4881 case nir_tex_src_ddy: 4882 ddy = get_src(ctx, instr->src[i].src); 4883 break; 4884 case nir_tex_src_texture_offset: 4885 case nir_tex_src_sampler_offset: 4886 case nir_tex_src_plane: 4887 default: 4888 break; 4889 } 4890 } 4891 4892 if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { 4893 result = get_buffer_size(ctx, res_ptr, true); 4894 goto write_result; 4895 } 4896 4897 if (instr->op == nir_texop_texture_samples) { 4898 LLVMValueRef res, samples, is_msaa; 4899 res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->ac.v8i32, ""); 4900 samples = LLVMBuildExtractElement(ctx->ac.builder, res, 4901 LLVMConstInt(ctx->ac.i32, 3, false), ""); 4902 is_msaa = LLVMBuildLShr(ctx->ac.builder, samples, 4903 LLVMConstInt(ctx->ac.i32, 28, false), ""); 4904 is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa, 4905 LLVMConstInt(ctx->ac.i32, 0xe, false), ""); 4906 is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa, 4907 LLVMConstInt(ctx->ac.i32, 0xe, false), ""); 4908 4909 samples = LLVMBuildLShr(ctx->ac.builder, samples, 4910 LLVMConstInt(ctx->ac.i32, 16, false), ""); 4911 samples = LLVMBuildAnd(ctx->ac.builder, samples, 4912 LLVMConstInt(ctx->ac.i32, 0xf, false), ""); 4913 samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1, 4914 samples, ""); 4915 samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples, 4916 ctx->ac.i32_1, ""); 4917 result = samples; 4918 goto write_result; 4919 } 4920 4921 if (coord) 4922 for (chan = 0; chan < instr->coord_components; chan++) 4923 coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan); 4924 4925 if (offsets && instr->op != nir_texop_txf) { 4926 LLVMValueRef offset[3], pack; 4927 for (chan = 0; chan < 3; ++chan) 4928 offset[chan] = ctx->ac.i32_0; 4929 4930 args.offset = true; 4931 for (chan = 0; chan < ac_get_llvm_num_components(offsets); chan++) { 4932 offset[chan] = ac_llvm_extract_elem(&ctx->ac, offsets, chan); 4933 offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan], 4934 LLVMConstInt(ctx->ac.i32, 0x3f, false), ""); 4935 if (chan) 4936 offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan], 4937 LLVMConstInt(ctx->ac.i32, chan * 8, false), ""); 4938 } 4939 pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], ""); 4940 pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], ""); 4941 address[count++] = pack; 4942 4943 } 4944 /* pack LOD bias value */ 4945 if (instr->op == nir_texop_txb && bias) { 4946 address[count++] = bias; 4947 } 4948 4949 /* Pack depth comparison value */ 4950 if (instr->is_shadow && comparator) { 4951 LLVMValueRef z = ac_to_float(&ctx->ac, 4952 ac_llvm_extract_elem(&ctx->ac, comparator, 0)); 4953 4954 /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT, 4955 * so the depth comparison value isn't clamped for Z16 and 4956 * Z24 anymore. Do it manually here. 4957 * 4958 * It's unnecessary if the original texture format was 4959 * Z32_FLOAT, but we don't know that here. 4960 */ 4961 if (ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference) 4962 z = ac_build_clamp(&ctx->ac, z); 4963 4964 address[count++] = z; 4965 } 4966 4967 /* pack derivatives */ 4968 if (ddx || ddy) { 4969 int num_src_deriv_channels, num_dest_deriv_channels; 4970 switch (instr->sampler_dim) { 4971 case GLSL_SAMPLER_DIM_3D: 4972 case GLSL_SAMPLER_DIM_CUBE: 4973 num_deriv_comp = 3; 4974 num_src_deriv_channels = 3; 4975 num_dest_deriv_channels = 3; 4976 break; 4977 case GLSL_SAMPLER_DIM_2D: 4978 default: 4979 num_src_deriv_channels = 2; 4980 num_dest_deriv_channels = 2; 4981 num_deriv_comp = 2; 4982 break; 4983 case GLSL_SAMPLER_DIM_1D: 4984 num_src_deriv_channels = 1; 4985 if (ctx->ac.chip_class >= GFX9) { 4986 num_dest_deriv_channels = 2; 4987 num_deriv_comp = 2; 4988 } else { 4989 num_dest_deriv_channels = 1; 4990 num_deriv_comp = 1; 4991 } 4992 break; 4993 } 4994 4995 for (unsigned i = 0; i < num_src_deriv_channels; i++) { 4996 derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i)); 4997 derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i)); 4998 } 4999 for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) { 5000 derivs[i] = ctx->ac.f32_0; 5001 derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0; 5002 } 5003 } 5004 5005 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) { 5006 for (chan = 0; chan < instr->coord_components; chan++) 5007 coords[chan] = ac_to_float(&ctx->ac, coords[chan]); 5008 if (instr->coord_components == 3) 5009 coords[3] = LLVMGetUndef(ctx->ac.f32); 5010 ac_prepare_cube_coords(&ctx->ac, 5011 instr->op == nir_texop_txd, instr->is_array, 5012 instr->op == nir_texop_lod, coords, derivs); 5013 if (num_deriv_comp) 5014 num_deriv_comp--; 5015 } 5016 5017 if (ddx || ddy) { 5018 for (unsigned i = 0; i < num_deriv_comp * 2; i++) 5019 address[count++] = derivs[i]; 5020 } 5021 5022 /* Pack texture coordinates */ 5023 if (coord) { 5024 address[count++] = coords[0]; 5025 if (instr->coord_components > 1) { 5026 if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) { 5027 coords[1] = apply_round_slice(&ctx->ac, coords[1]); 5028 } 5029 address[count++] = coords[1]; 5030 } 5031 if (instr->coord_components > 2) { 5032 /* This seems like a bit of a hack - but it passes Vulkan CTS with it */ 5033 if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D && 5034 instr->sampler_dim != GLSL_SAMPLER_DIM_CUBE && 5035 instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) { 5036 coords[2] = apply_round_slice(&ctx->ac, coords[2]); 5037 } 5038 address[count++] = coords[2]; 5039 } 5040 5041 if (ctx->ac.chip_class >= GFX9) { 5042 LLVMValueRef filler; 5043 if (instr->op == nir_texop_txf) 5044 filler = ctx->ac.i32_0; 5045 else 5046 filler = LLVMConstReal(ctx->ac.f32, 0.5); 5047 5048 if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D) { 5049 /* No nir_texop_lod, because it does not take a slice 5050 * even with array textures. */ 5051 if (instr->is_array && instr->op != nir_texop_lod ) { 5052 address[count] = address[count - 1]; 5053 address[count - 1] = filler; 5054 count++; 5055 } else 5056 address[count++] = filler; 5057 } 5058 } 5059 } 5060 5061 /* Pack LOD */ 5062 if (lod && ((instr->op == nir_texop_txl && !lod_is_zero) || 5063 instr->op == nir_texop_txf)) { 5064 address[count++] = lod; 5065 } else if (instr->op == nir_texop_txf_ms && sample_index) { 5066 address[count++] = sample_index; 5067 } else if(instr->op == nir_texop_txs) { 5068 count = 0; 5069 if (lod) 5070 address[count++] = lod; 5071 else 5072 address[count++] = ctx->ac.i32_0; 5073 } 5074 5075 for (chan = 0; chan < count; chan++) { 5076 address[chan] = LLVMBuildBitCast(ctx->ac.builder, 5077 address[chan], ctx->ac.i32, ""); 5078 } 5079 5080 if (instr->op == nir_texop_samples_identical) { 5081 LLVMValueRef txf_address[4]; 5082 struct ac_image_args txf_args = { 0 }; 5083 unsigned txf_count = count; 5084 memcpy(txf_address, address, sizeof(txf_address)); 5085 5086 if (!instr->is_array) 5087 txf_address[2] = ctx->ac.i32_0; 5088 txf_address[3] = ctx->ac.i32_0; 5089 5090 set_tex_fetch_args(&ctx->ac, &txf_args, instr, nir_texop_txf, 5091 fmask_ptr, NULL, 5092 txf_address, txf_count, 0xf); 5093 5094 result = build_tex_intrinsic(ctx, instr, false, &txf_args); 5095 5096 result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); 5097 result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0); 5098 goto write_result; 5099 } 5100 5101 if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS && 5102 instr->op != nir_texop_txs) { 5103 unsigned sample_chan = instr->is_array ? 3 : 2; 5104 address[sample_chan] = adjust_sample_index_using_fmask(&ctx->ac, 5105 address[0], 5106 address[1], 5107 instr->is_array ? address[2] : NULL, 5108 address[sample_chan], 5109 fmask_ptr); 5110 } 5111 5112 if (offsets && instr->op == nir_texop_txf) { 5113 nir_const_value *const_offset = 5114 nir_src_as_const_value(instr->src[const_src].src); 5115 int num_offsets = instr->src[const_src].src.ssa->num_components; 5116 assert(const_offset); 5117 num_offsets = MIN2(num_offsets, instr->coord_components); 5118 if (num_offsets > 2) 5119 address[2] = LLVMBuildAdd(ctx->ac.builder, 5120 address[2], LLVMConstInt(ctx->ac.i32, const_offset->i32[2], false), ""); 5121 if (num_offsets > 1) 5122 address[1] = LLVMBuildAdd(ctx->ac.builder, 5123 address[1], LLVMConstInt(ctx->ac.i32, const_offset->i32[1], false), ""); 5124 address[0] = LLVMBuildAdd(ctx->ac.builder, 5125 address[0], LLVMConstInt(ctx->ac.i32, const_offset->i32[0], false), ""); 5126 5127 } 5128 5129 /* TODO TG4 support */ 5130 if (instr->op == nir_texop_tg4) { 5131 if (instr->is_shadow) 5132 dmask = 1; 5133 else 5134 dmask = 1 << instr->component; 5135 } 5136 set_tex_fetch_args(&ctx->ac, &args, instr, instr->op, 5137 res_ptr, samp_ptr, address, count, dmask); 5138 5139 result = build_tex_intrinsic(ctx, instr, lod_is_zero, &args); 5140 5141 if (instr->op == nir_texop_query_levels) 5142 result = LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), ""); 5143 else if (instr->is_shadow && instr->is_new_style_shadow && 5144 instr->op != nir_texop_txs && instr->op != nir_texop_lod && 5145 instr->op != nir_texop_tg4) 5146 result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); 5147 else if (instr->op == nir_texop_txs && 5148 instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && 5149 instr->is_array) { 5150 LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false); 5151 LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false); 5152 LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, ""); 5153 z = LLVMBuildSDiv(ctx->ac.builder, z, six, ""); 5154 result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, ""); 5155 } else if (ctx->ac.chip_class >= GFX9 && 5156 instr->op == nir_texop_txs && 5157 instr->sampler_dim == GLSL_SAMPLER_DIM_1D && 5158 instr->is_array) { 5159 LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false); 5160 LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, ""); 5161 result = LLVMBuildInsertElement(ctx->ac.builder, result, layers, 5162 ctx->ac.i32_1, ""); 5163 } else if (instr->dest.ssa.num_components != 4) 5164 result = trim_vector(&ctx->ac, result, instr->dest.ssa.num_components); 5165 5166 write_result: 5167 if (result) { 5168 assert(instr->dest.is_ssa); 5169 result = ac_to_integer(&ctx->ac, result); 5170 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result); 5171 } 5172 } 5173 5174 5175 static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr) 5176 { 5177 LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa); 5178 LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, ""); 5179 5180 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result); 5181 _mesa_hash_table_insert(ctx->phis, instr, result); 5182 } 5183 5184 static void visit_post_phi(struct ac_nir_context *ctx, 5185 nir_phi_instr *instr, 5186 LLVMValueRef llvm_phi) 5187 { 5188 nir_foreach_phi_src(src, instr) { 5189 LLVMBasicBlockRef block = get_block(ctx, src->pred); 5190 LLVMValueRef llvm_src = get_src(ctx, src->src); 5191 5192 LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1); 5193 } 5194 } 5195 5196 static void phi_post_pass(struct ac_nir_context *ctx) 5197 { 5198 struct hash_entry *entry; 5199 hash_table_foreach(ctx->phis, entry) { 5200 visit_post_phi(ctx, (nir_phi_instr*)entry->key, 5201 (LLVMValueRef)entry->data); 5202 } 5203 } 5204 5205 5206 static void visit_ssa_undef(struct ac_nir_context *ctx, 5207 const nir_ssa_undef_instr *instr) 5208 { 5209 unsigned num_components = instr->def.num_components; 5210 LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size); 5211 LLVMValueRef undef; 5212 5213 if (num_components == 1) 5214 undef = LLVMGetUndef(type); 5215 else { 5216 undef = LLVMGetUndef(LLVMVectorType(type, num_components)); 5217 } 5218 _mesa_hash_table_insert(ctx->defs, &instr->def, undef); 5219 } 5220 5221 static void visit_jump(struct ac_llvm_context *ctx, 5222 const nir_jump_instr *instr) 5223 { 5224 switch (instr->type) { 5225 case nir_jump_break: 5226 ac_build_break(ctx); 5227 break; 5228 case nir_jump_continue: 5229 ac_build_continue(ctx); 5230 break; 5231 default: 5232 fprintf(stderr, "Unknown NIR jump instr: "); 5233 nir_print_instr(&instr->instr, stderr); 5234 fprintf(stderr, "\n"); 5235 abort(); 5236 } 5237 } 5238 5239 static void visit_cf_list(struct ac_nir_context *ctx, 5240 struct exec_list *list); 5241 5242 static void visit_block(struct ac_nir_context *ctx, nir_block *block) 5243 { 5244 LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->ac.builder); 5245 nir_foreach_instr(instr, block) 5246 { 5247 switch (instr->type) { 5248 case nir_instr_type_alu: 5249 visit_alu(ctx, nir_instr_as_alu(instr)); 5250 break; 5251 case nir_instr_type_load_const: 5252 visit_load_const(ctx, nir_instr_as_load_const(instr)); 5253 break; 5254 case nir_instr_type_intrinsic: 5255 visit_intrinsic(ctx, nir_instr_as_intrinsic(instr)); 5256 break; 5257 case nir_instr_type_tex: 5258 visit_tex(ctx, nir_instr_as_tex(instr)); 5259 break; 5260 case nir_instr_type_phi: 5261 visit_phi(ctx, nir_instr_as_phi(instr)); 5262 break; 5263 case nir_instr_type_ssa_undef: 5264 visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr)); 5265 break; 5266 case nir_instr_type_jump: 5267 visit_jump(&ctx->ac, nir_instr_as_jump(instr)); 5268 break; 5269 default: 5270 fprintf(stderr, "Unknown NIR instr type: "); 5271 nir_print_instr(instr, stderr); 5272 fprintf(stderr, "\n"); 5273 abort(); 5274 } 5275 } 5276 5277 _mesa_hash_table_insert(ctx->defs, block, llvm_block); 5278 } 5279 5280 static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt) 5281 { 5282 LLVMValueRef value = get_src(ctx, if_stmt->condition); 5283 5284 nir_block *then_block = 5285 (nir_block *) exec_list_get_head(&if_stmt->then_list); 5286 5287 ac_build_uif(&ctx->ac, value, then_block->index); 5288 5289 visit_cf_list(ctx, &if_stmt->then_list); 5290 5291 if (!exec_list_is_empty(&if_stmt->else_list)) { 5292 nir_block *else_block = 5293 (nir_block *) exec_list_get_head(&if_stmt->else_list); 5294 5295 ac_build_else(&ctx->ac, else_block->index); 5296 visit_cf_list(ctx, &if_stmt->else_list); 5297 } 5298 5299 ac_build_endif(&ctx->ac, then_block->index); 5300 } 5301 5302 static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop) 5303 { 5304 nir_block *first_loop_block = 5305 (nir_block *) exec_list_get_head(&loop->body); 5306 5307 ac_build_bgnloop(&ctx->ac, first_loop_block->index); 5308 5309 visit_cf_list(ctx, &loop->body); 5310 5311 ac_build_endloop(&ctx->ac, first_loop_block->index); 5312 } 5313 5314 static void visit_cf_list(struct ac_nir_context *ctx, 5315 struct exec_list *list) 5316 { 5317 foreach_list_typed(nir_cf_node, node, node, list) 5318 { 5319 switch (node->type) { 5320 case nir_cf_node_block: 5321 visit_block(ctx, nir_cf_node_as_block(node)); 5322 break; 5323 5324 case nir_cf_node_if: 5325 visit_if(ctx, nir_cf_node_as_if(node)); 5326 break; 5327 5328 case nir_cf_node_loop: 5329 visit_loop(ctx, nir_cf_node_as_loop(node)); 5330 break; 5331 5332 default: 5333 assert(0); 5334 } 5335 } 5336 } 5337 5338 static void 5339 handle_vs_input_decl(struct nir_to_llvm_context *ctx, 5340 struct nir_variable *variable) 5341 { 5342 LLVMValueRef t_list_ptr = ctx->vertex_buffers; 5343 LLVMValueRef t_offset; 5344 LLVMValueRef t_list; 5345 LLVMValueRef input; 5346 LLVMValueRef buffer_index; 5347 int index = variable->data.location - VERT_ATTRIB_GENERIC0; 5348 int idx = variable->data.location; 5349 unsigned attrib_count = glsl_count_attribute_slots(variable->type, true); 5350 5351 variable->data.driver_location = idx * 4; 5352 5353 for (unsigned i = 0; i < attrib_count; ++i, ++idx) { 5354 if (ctx->options->key.vs.instance_rate_inputs & (1u << (index + i))) { 5355 buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.instance_id, 5356 ctx->abi.start_instance, ""); 5357 if (ctx->options->key.vs.as_ls) { 5358 ctx->shader_info->vs.vgpr_comp_cnt = 5359 MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt); 5360 } else { 5361 ctx->shader_info->vs.vgpr_comp_cnt = 5362 MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt); 5363 } 5364 } else 5365 buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.vertex_id, 5366 ctx->abi.base_vertex, ""); 5367 t_offset = LLVMConstInt(ctx->ac.i32, index + i, false); 5368 5369 t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset); 5370 5371 input = ac_build_buffer_load_format(&ctx->ac, t_list, 5372 buffer_index, 5373 ctx->ac.i32_0, 5374 true); 5375 5376 for (unsigned chan = 0; chan < 4; chan++) { 5377 LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); 5378 ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] = 5379 ac_to_integer(&ctx->ac, LLVMBuildExtractElement(ctx->builder, 5380 input, llvm_chan, "")); 5381 } 5382 } 5383 } 5384 5385 static void interp_fs_input(struct nir_to_llvm_context *ctx, 5386 unsigned attr, 5387 LLVMValueRef interp_param, 5388 LLVMValueRef prim_mask, 5389 LLVMValueRef result[4]) 5390 { 5391 LLVMValueRef attr_number; 5392 unsigned chan; 5393 LLVMValueRef i, j; 5394 bool interp = interp_param != NULL; 5395 5396 attr_number = LLVMConstInt(ctx->ac.i32, attr, false); 5397 5398 /* fs.constant returns the param from the middle vertex, so it's not 5399 * really useful for flat shading. It's meant to be used for custom 5400 * interpolation (but the intrinsic can't fetch from the other two 5401 * vertices). 5402 * 5403 * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state 5404 * to do the right thing. The only reason we use fs.constant is that 5405 * fs.interp cannot be used on integers, because they can be equal 5406 * to NaN. 5407 */ 5408 if (interp) { 5409 interp_param = LLVMBuildBitCast(ctx->builder, interp_param, 5410 ctx->ac.v2f32, ""); 5411 5412 i = LLVMBuildExtractElement(ctx->builder, interp_param, 5413 ctx->ac.i32_0, ""); 5414 j = LLVMBuildExtractElement(ctx->builder, interp_param, 5415 ctx->ac.i32_1, ""); 5416 } 5417 5418 for (chan = 0; chan < 4; chan++) { 5419 LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); 5420 5421 if (interp) { 5422 result[chan] = ac_build_fs_interp(&ctx->ac, 5423 llvm_chan, 5424 attr_number, 5425 prim_mask, i, j); 5426 } else { 5427 result[chan] = ac_build_fs_interp_mov(&ctx->ac, 5428 LLVMConstInt(ctx->ac.i32, 2, false), 5429 llvm_chan, 5430 attr_number, 5431 prim_mask); 5432 } 5433 } 5434 } 5435 5436 static void 5437 handle_fs_input_decl(struct nir_to_llvm_context *ctx, 5438 struct nir_variable *variable) 5439 { 5440 int idx = variable->data.location; 5441 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); 5442 LLVMValueRef interp; 5443 5444 variable->data.driver_location = idx * 4; 5445 ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location; 5446 5447 if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) { 5448 unsigned interp_type; 5449 if (variable->data.sample) { 5450 interp_type = INTERP_SAMPLE; 5451 ctx->shader_info->info.ps.force_persample = true; 5452 } else if (variable->data.centroid) 5453 interp_type = INTERP_CENTROID; 5454 else 5455 interp_type = INTERP_CENTER; 5456 5457 interp = lookup_interp_param(ctx, variable->data.interpolation, interp_type); 5458 } else 5459 interp = NULL; 5460 5461 for (unsigned i = 0; i < attrib_count; ++i) 5462 ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp; 5463 5464 } 5465 5466 static void 5467 handle_vs_inputs(struct nir_to_llvm_context *ctx, 5468 struct nir_shader *nir) { 5469 nir_foreach_variable(variable, &nir->inputs) 5470 handle_vs_input_decl(ctx, variable); 5471 } 5472 5473 static void 5474 prepare_interp_optimize(struct nir_to_llvm_context *ctx, 5475 struct nir_shader *nir) 5476 { 5477 if (!ctx->options->key.fs.multisample) 5478 return; 5479 5480 bool uses_center = false; 5481 bool uses_centroid = false; 5482 nir_foreach_variable(variable, &nir->inputs) { 5483 if (glsl_get_base_type(glsl_without_array(variable->type)) != GLSL_TYPE_FLOAT || 5484 variable->data.sample) 5485 continue; 5486 5487 if (variable->data.centroid) 5488 uses_centroid = true; 5489 else 5490 uses_center = true; 5491 } 5492 5493 if (uses_center && uses_centroid) { 5494 LLVMValueRef sel = LLVMBuildICmp(ctx->builder, LLVMIntSLT, ctx->prim_mask, ctx->ac.i32_0, ""); 5495 ctx->persp_centroid = LLVMBuildSelect(ctx->builder, sel, ctx->persp_center, ctx->persp_centroid, ""); 5496 ctx->linear_centroid = LLVMBuildSelect(ctx->builder, sel, ctx->linear_center, ctx->linear_centroid, ""); 5497 } 5498 } 5499 5500 static void 5501 handle_fs_inputs(struct nir_to_llvm_context *ctx, 5502 struct nir_shader *nir) 5503 { 5504 prepare_interp_optimize(ctx, nir); 5505 5506 nir_foreach_variable(variable, &nir->inputs) 5507 handle_fs_input_decl(ctx, variable); 5508 5509 unsigned index = 0; 5510 5511 if (ctx->shader_info->info.ps.uses_input_attachments || 5512 ctx->shader_info->info.needs_multiview_view_index) 5513 ctx->input_mask |= 1ull << VARYING_SLOT_LAYER; 5514 5515 for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) { 5516 LLVMValueRef interp_param; 5517 LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0); 5518 5519 if (!(ctx->input_mask & (1ull << i))) 5520 continue; 5521 5522 if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC || 5523 i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) { 5524 interp_param = *inputs; 5525 interp_fs_input(ctx, index, interp_param, ctx->prim_mask, 5526 inputs); 5527 5528 if (!interp_param) 5529 ctx->shader_info->fs.flat_shaded_mask |= 1u << index; 5530 ++index; 5531 } else if (i == VARYING_SLOT_POS) { 5532 for(int i = 0; i < 3; ++i) 5533 inputs[i] = ctx->abi.frag_pos[i]; 5534 5535 inputs[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, 5536 ctx->abi.frag_pos[3]); 5537 } 5538 } 5539 ctx->shader_info->fs.num_interp = index; 5540 if (ctx->input_mask & (1 << VARYING_SLOT_PNTC)) 5541 ctx->shader_info->fs.has_pcoord = true; 5542 if (ctx->input_mask & (1 << VARYING_SLOT_PRIMITIVE_ID)) 5543 ctx->shader_info->fs.prim_id_input = true; 5544 if (ctx->input_mask & (1 << VARYING_SLOT_LAYER)) 5545 ctx->shader_info->fs.layer_input = true; 5546 ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0; 5547 5548 if (ctx->shader_info->info.needs_multiview_view_index) 5549 ctx->view_index = ctx->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]; 5550 } 5551 5552 static LLVMValueRef 5553 ac_build_alloca(struct ac_llvm_context *ac, 5554 LLVMTypeRef type, 5555 const char *name) 5556 { 5557 LLVMBuilderRef builder = ac->builder; 5558 LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder); 5559 LLVMValueRef function = LLVMGetBasicBlockParent(current_block); 5560 LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function); 5561 LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block); 5562 LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ac->context); 5563 LLVMValueRef res; 5564 5565 if (first_instr) { 5566 LLVMPositionBuilderBefore(first_builder, first_instr); 5567 } else { 5568 LLVMPositionBuilderAtEnd(first_builder, first_block); 5569 } 5570 5571 res = LLVMBuildAlloca(first_builder, type, name); 5572 LLVMBuildStore(builder, LLVMConstNull(type), res); 5573 5574 LLVMDisposeBuilder(first_builder); 5575 5576 return res; 5577 } 5578 5579 static LLVMValueRef si_build_alloca_undef(struct ac_llvm_context *ac, 5580 LLVMTypeRef type, 5581 const char *name) 5582 { 5583 LLVMValueRef ptr = ac_build_alloca(ac, type, name); 5584 LLVMBuildStore(ac->builder, LLVMGetUndef(type), ptr); 5585 return ptr; 5586 } 5587 5588 static void 5589 scan_shader_output_decl(struct nir_to_llvm_context *ctx, 5590 struct nir_variable *variable, 5591 struct nir_shader *shader, 5592 gl_shader_stage stage) 5593 { 5594 int idx = variable->data.location + variable->data.index; 5595 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); 5596 uint64_t mask_attribs; 5597 5598 variable->data.driver_location = idx * 4; 5599 5600 /* tess ctrl has it's own load/store paths for outputs */ 5601 if (stage == MESA_SHADER_TESS_CTRL) 5602 return; 5603 5604 mask_attribs = ((1ull << attrib_count) - 1) << idx; 5605 if (stage == MESA_SHADER_VERTEX || 5606 stage == MESA_SHADER_TESS_EVAL || 5607 stage == MESA_SHADER_GEOMETRY) { 5608 if (idx == VARYING_SLOT_CLIP_DIST0) { 5609 int length = shader->info.clip_distance_array_size + 5610 shader->info.cull_distance_array_size; 5611 if (stage == MESA_SHADER_VERTEX) { 5612 ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1; 5613 ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1; 5614 } 5615 if (stage == MESA_SHADER_TESS_EVAL) { 5616 ctx->shader_info->tes.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1; 5617 ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1; 5618 } 5619 5620 if (length > 4) 5621 attrib_count = 2; 5622 else 5623 attrib_count = 1; 5624 mask_attribs = 1ull << idx; 5625 } 5626 } 5627 5628 ctx->output_mask |= mask_attribs; 5629 } 5630 5631 static void 5632 handle_shader_output_decl(struct ac_nir_context *ctx, 5633 struct nir_shader *nir, 5634 struct nir_variable *variable) 5635 { 5636 unsigned output_loc = variable->data.driver_location / 4; 5637 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); 5638 5639 /* tess ctrl has it's own load/store paths for outputs */ 5640 if (ctx->stage == MESA_SHADER_TESS_CTRL) 5641 return; 5642 5643 if (ctx->stage == MESA_SHADER_VERTEX || 5644 ctx->stage == MESA_SHADER_TESS_EVAL || 5645 ctx->stage == MESA_SHADER_GEOMETRY) { 5646 int idx = variable->data.location + variable->data.index; 5647 if (idx == VARYING_SLOT_CLIP_DIST0) { 5648 int length = nir->info.clip_distance_array_size + 5649 nir->info.cull_distance_array_size; 5650 5651 if (length > 4) 5652 attrib_count = 2; 5653 else 5654 attrib_count = 1; 5655 } 5656 } 5657 5658 for (unsigned i = 0; i < attrib_count; ++i) { 5659 for (unsigned chan = 0; chan < 4; chan++) { 5660 ctx->outputs[radeon_llvm_reg_index_soa(output_loc + i, chan)] = 5661 si_build_alloca_undef(&ctx->ac, ctx->ac.f32, ""); 5662 } 5663 } 5664 } 5665 5666 static LLVMTypeRef 5667 glsl_base_to_llvm_type(struct nir_to_llvm_context *ctx, 5668 enum glsl_base_type type) 5669 { 5670 switch (type) { 5671 case GLSL_TYPE_INT: 5672 case GLSL_TYPE_UINT: 5673 case GLSL_TYPE_BOOL: 5674 case GLSL_TYPE_SUBROUTINE: 5675 return ctx->ac.i32; 5676 case GLSL_TYPE_FLOAT: /* TODO handle mediump */ 5677 return ctx->ac.f32; 5678 case GLSL_TYPE_INT64: 5679 case GLSL_TYPE_UINT64: 5680 return ctx->ac.i64; 5681 case GLSL_TYPE_DOUBLE: 5682 return ctx->ac.f64; 5683 default: 5684 unreachable("unknown GLSL type"); 5685 } 5686 } 5687 5688 static LLVMTypeRef 5689 glsl_to_llvm_type(struct nir_to_llvm_context *ctx, 5690 const struct glsl_type *type) 5691 { 5692 if (glsl_type_is_scalar(type)) { 5693 return glsl_base_to_llvm_type(ctx, glsl_get_base_type(type)); 5694 } 5695 5696 if (glsl_type_is_vector(type)) { 5697 return LLVMVectorType( 5698 glsl_base_to_llvm_type(ctx, glsl_get_base_type(type)), 5699 glsl_get_vector_elements(type)); 5700 } 5701 5702 if (glsl_type_is_matrix(type)) { 5703 return LLVMArrayType( 5704 glsl_to_llvm_type(ctx, glsl_get_column_type(type)), 5705 glsl_get_matrix_columns(type)); 5706 } 5707 5708 if (glsl_type_is_array(type)) { 5709 return LLVMArrayType( 5710 glsl_to_llvm_type(ctx, glsl_get_array_element(type)), 5711 glsl_get_length(type)); 5712 } 5713 5714 assert(glsl_type_is_struct(type)); 5715 5716 LLVMTypeRef member_types[glsl_get_length(type)]; 5717 5718 for (unsigned i = 0; i < glsl_get_length(type); i++) { 5719 member_types[i] = 5720 glsl_to_llvm_type(ctx, 5721 glsl_get_struct_field(type, i)); 5722 } 5723 5724 return LLVMStructTypeInContext(ctx->context, member_types, 5725 glsl_get_length(type), false); 5726 } 5727 5728 static void 5729 setup_locals(struct ac_nir_context *ctx, 5730 struct nir_function *func) 5731 { 5732 int i, j; 5733 ctx->num_locals = 0; 5734 nir_foreach_variable(variable, &func->impl->locals) { 5735 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); 5736 variable->data.driver_location = ctx->num_locals * 4; 5737 variable->data.location_frac = 0; 5738 ctx->num_locals += attrib_count; 5739 } 5740 ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef)); 5741 if (!ctx->locals) 5742 return; 5743 5744 for (i = 0; i < ctx->num_locals; i++) { 5745 for (j = 0; j < 4; j++) { 5746 ctx->locals[i * 4 + j] = 5747 si_build_alloca_undef(&ctx->ac, ctx->ac.f32, "temp"); 5748 } 5749 } 5750 } 5751 5752 static void 5753 setup_shared(struct ac_nir_context *ctx, 5754 struct nir_shader *nir) 5755 { 5756 nir_foreach_variable(variable, &nir->shared) { 5757 LLVMValueRef shared = 5758 LLVMAddGlobalInAddressSpace( 5759 ctx->ac.module, glsl_to_llvm_type(ctx->nctx, variable->type), 5760 variable->name ? variable->name : "", 5761 LOCAL_ADDR_SPACE); 5762 _mesa_hash_table_insert(ctx->vars, variable, shared); 5763 } 5764 } 5765 5766 static LLVMValueRef 5767 emit_float_saturate(struct ac_llvm_context *ctx, LLVMValueRef v, float lo, float hi) 5768 { 5769 v = ac_to_float(ctx, v); 5770 v = emit_intrin_2f_param(ctx, "llvm.maxnum", ctx->f32, v, LLVMConstReal(ctx->f32, lo)); 5771 return emit_intrin_2f_param(ctx, "llvm.minnum", ctx->f32, v, LLVMConstReal(ctx->f32, hi)); 5772 } 5773 5774 5775 static LLVMValueRef emit_pack_int16(struct nir_to_llvm_context *ctx, 5776 LLVMValueRef src0, LLVMValueRef src1) 5777 { 5778 LLVMValueRef const16 = LLVMConstInt(ctx->ac.i32, 16, false); 5779 LLVMValueRef comp[2]; 5780 5781 comp[0] = LLVMBuildAnd(ctx->builder, src0, LLVMConstInt(ctx->ac.i32, 65535, 0), ""); 5782 comp[1] = LLVMBuildAnd(ctx->builder, src1, LLVMConstInt(ctx->ac.i32, 65535, 0), ""); 5783 comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, ""); 5784 return LLVMBuildOr(ctx->builder, comp[0], comp[1], ""); 5785 } 5786 5787 /* Initialize arguments for the shader export intrinsic */ 5788 static void 5789 si_llvm_init_export_args(struct nir_to_llvm_context *ctx, 5790 LLVMValueRef *values, 5791 unsigned target, 5792 struct ac_export_args *args) 5793 { 5794 /* Default is 0xf. Adjusted below depending on the format. */ 5795 args->enabled_channels = 0xf; 5796 5797 /* Specify whether the EXEC mask represents the valid mask */ 5798 args->valid_mask = 0; 5799 5800 /* Specify whether this is the last export */ 5801 args->done = 0; 5802 5803 /* Specify the target we are exporting */ 5804 args->target = target; 5805 5806 args->compr = false; 5807 args->out[0] = LLVMGetUndef(ctx->ac.f32); 5808 args->out[1] = LLVMGetUndef(ctx->ac.f32); 5809 args->out[2] = LLVMGetUndef(ctx->ac.f32); 5810 args->out[3] = LLVMGetUndef(ctx->ac.f32); 5811 5812 if (!values) 5813 return; 5814 5815 if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) { 5816 LLVMValueRef val[4]; 5817 unsigned index = target - V_008DFC_SQ_EXP_MRT; 5818 unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf; 5819 bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1; 5820 bool is_int10 = (ctx->options->key.fs.is_int10 >> index) & 1; 5821 5822 switch(col_format) { 5823 case V_028714_SPI_SHADER_ZERO: 5824 args->enabled_channels = 0; /* writemask */ 5825 args->target = V_008DFC_SQ_EXP_NULL; 5826 break; 5827 5828 case V_028714_SPI_SHADER_32_R: 5829 args->enabled_channels = 1; 5830 args->out[0] = values[0]; 5831 break; 5832 5833 case V_028714_SPI_SHADER_32_GR: 5834 args->enabled_channels = 0x3; 5835 args->out[0] = values[0]; 5836 args->out[1] = values[1]; 5837 break; 5838 5839 case V_028714_SPI_SHADER_32_AR: 5840 args->enabled_channels = 0x9; 5841 args->out[0] = values[0]; 5842 args->out[3] = values[3]; 5843 break; 5844 5845 case V_028714_SPI_SHADER_FP16_ABGR: 5846 args->compr = 1; 5847 5848 for (unsigned chan = 0; chan < 2; chan++) { 5849 LLVMValueRef pack_args[2] = { 5850 values[2 * chan], 5851 values[2 * chan + 1] 5852 }; 5853 LLVMValueRef packed; 5854 5855 packed = ac_build_cvt_pkrtz_f16(&ctx->ac, pack_args); 5856 args->out[chan] = packed; 5857 } 5858 break; 5859 5860 case V_028714_SPI_SHADER_UNORM16_ABGR: 5861 for (unsigned chan = 0; chan < 4; chan++) { 5862 val[chan] = ac_build_clamp(&ctx->ac, values[chan]); 5863 val[chan] = LLVMBuildFMul(ctx->builder, val[chan], 5864 LLVMConstReal(ctx->ac.f32, 65535), ""); 5865 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan], 5866 LLVMConstReal(ctx->ac.f32, 0.5), ""); 5867 val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan], 5868 ctx->ac.i32, ""); 5869 } 5870 5871 args->compr = 1; 5872 args->out[0] = emit_pack_int16(ctx, val[0], val[1]); 5873 args->out[1] = emit_pack_int16(ctx, val[2], val[3]); 5874 break; 5875 5876 case V_028714_SPI_SHADER_SNORM16_ABGR: 5877 for (unsigned chan = 0; chan < 4; chan++) { 5878 val[chan] = emit_float_saturate(&ctx->ac, values[chan], -1, 1); 5879 val[chan] = LLVMBuildFMul(ctx->builder, val[chan], 5880 LLVMConstReal(ctx->ac.f32, 32767), ""); 5881 5882 /* If positive, add 0.5, else add -0.5. */ 5883 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan], 5884 LLVMBuildSelect(ctx->builder, 5885 LLVMBuildFCmp(ctx->builder, LLVMRealOGE, 5886 val[chan], ctx->ac.f32_0, ""), 5887 LLVMConstReal(ctx->ac.f32, 0.5), 5888 LLVMConstReal(ctx->ac.f32, -0.5), ""), ""); 5889 val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->ac.i32, ""); 5890 } 5891 5892 args->compr = 1; 5893 args->out[0] = emit_pack_int16(ctx, val[0], val[1]); 5894 args->out[1] = emit_pack_int16(ctx, val[2], val[3]); 5895 break; 5896 5897 case V_028714_SPI_SHADER_UINT16_ABGR: { 5898 LLVMValueRef max_rgb = LLVMConstInt(ctx->ac.i32, 5899 is_int8 ? 255 : is_int10 ? 1023 : 65535, 0); 5900 LLVMValueRef max_alpha = !is_int10 ? max_rgb : LLVMConstInt(ctx->ac.i32, 3, 0); 5901 5902 for (unsigned chan = 0; chan < 4; chan++) { 5903 val[chan] = ac_to_integer(&ctx->ac, values[chan]); 5904 val[chan] = emit_minmax_int(&ctx->ac, LLVMIntULT, val[chan], chan == 3 ? max_alpha : max_rgb); 5905 } 5906 5907 args->compr = 1; 5908 args->out[0] = emit_pack_int16(ctx, val[0], val[1]); 5909 args->out[1] = emit_pack_int16(ctx, val[2], val[3]); 5910 break; 5911 } 5912 5913 case V_028714_SPI_SHADER_SINT16_ABGR: { 5914 LLVMValueRef max_rgb = LLVMConstInt(ctx->ac.i32, 5915 is_int8 ? 127 : is_int10 ? 511 : 32767, 0); 5916 LLVMValueRef min_rgb = LLVMConstInt(ctx->ac.i32, 5917 is_int8 ? -128 : is_int10 ? -512 : -32768, 0); 5918 LLVMValueRef max_alpha = !is_int10 ? max_rgb : ctx->ac.i32_1; 5919 LLVMValueRef min_alpha = !is_int10 ? min_rgb : LLVMConstInt(ctx->ac.i32, -2, 0); 5920 5921 /* Clamp. */ 5922 for (unsigned chan = 0; chan < 4; chan++) { 5923 val[chan] = ac_to_integer(&ctx->ac, values[chan]); 5924 val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSLT, val[chan], chan == 3 ? max_alpha : max_rgb); 5925 val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSGT, val[chan], chan == 3 ? min_alpha : min_rgb); 5926 } 5927 5928 args->compr = 1; 5929 args->out[0] = emit_pack_int16(ctx, val[0], val[1]); 5930 args->out[1] = emit_pack_int16(ctx, val[2], val[3]); 5931 break; 5932 } 5933 5934 default: 5935 case V_028714_SPI_SHADER_32_ABGR: 5936 memcpy(&args->out[0], values, sizeof(values[0]) * 4); 5937 break; 5938 } 5939 } else 5940 memcpy(&args->out[0], values, sizeof(values[0]) * 4); 5941 5942 for (unsigned i = 0; i < 4; ++i) 5943 args->out[i] = ac_to_float(&ctx->ac, args->out[i]); 5944 } 5945 5946 static void 5947 handle_vs_outputs_post(struct nir_to_llvm_context *ctx, 5948 bool export_prim_id, 5949 struct ac_vs_output_info *outinfo) 5950 { 5951 uint32_t param_count = 0; 5952 unsigned target; 5953 unsigned pos_idx, num_pos_exports = 0; 5954 struct ac_export_args args, pos_args[4] = {}; 5955 LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL; 5956 int i; 5957 5958 if (ctx->options->key.has_multiview_view_index) { 5959 LLVMValueRef* tmp_out = &ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]; 5960 if(!*tmp_out) { 5961 for(unsigned i = 0; i < 4; ++i) 5962 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, i)] = 5963 si_build_alloca_undef(&ctx->ac, ctx->ac.f32, ""); 5964 } 5965 5966 LLVMBuildStore(ctx->builder, ac_to_float(&ctx->ac, ctx->view_index), *tmp_out); 5967 ctx->output_mask |= 1ull << VARYING_SLOT_LAYER; 5968 } 5969 5970 memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED, 5971 sizeof(outinfo->vs_output_param_offset)); 5972 5973 if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) { 5974 LLVMValueRef slots[8]; 5975 unsigned j; 5976 5977 if (outinfo->cull_dist_mask) 5978 outinfo->cull_dist_mask <<= ctx->num_output_clips; 5979 5980 i = VARYING_SLOT_CLIP_DIST0; 5981 for (j = 0; j < ctx->num_output_clips + ctx->num_output_culls; j++) 5982 slots[j] = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, 5983 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], "")); 5984 5985 for (i = ctx->num_output_clips + ctx->num_output_culls; i < 8; i++) 5986 slots[i] = LLVMGetUndef(ctx->ac.f32); 5987 5988 if (ctx->num_output_clips + ctx->num_output_culls > 4) { 5989 target = V_008DFC_SQ_EXP_POS + 3; 5990 si_llvm_init_export_args(ctx, &slots[4], target, &args); 5991 memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], 5992 &args, sizeof(args)); 5993 } 5994 5995 target = V_008DFC_SQ_EXP_POS + 2; 5996 si_llvm_init_export_args(ctx, &slots[0], target, &args); 5997 memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], 5998 &args, sizeof(args)); 5999 6000 } 6001 6002 LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_1}; 6003 if (ctx->output_mask & (1ull << VARYING_SLOT_POS)) { 6004 for (unsigned j = 0; j < 4; j++) 6005 pos_values[j] = LLVMBuildLoad(ctx->builder, 6006 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_POS, j)], ""); 6007 } 6008 si_llvm_init_export_args(ctx, pos_values, V_008DFC_SQ_EXP_POS, &pos_args[0]); 6009 6010 if (ctx->output_mask & (1ull << VARYING_SLOT_PSIZ)) { 6011 outinfo->writes_pointsize = true; 6012 psize_value = LLVMBuildLoad(ctx->builder, 6013 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_PSIZ, 0)], ""); 6014 } 6015 6016 if (ctx->output_mask & (1ull << VARYING_SLOT_LAYER)) { 6017 outinfo->writes_layer = true; 6018 layer_value = LLVMBuildLoad(ctx->builder, 6019 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)], ""); 6020 } 6021 6022 if (ctx->output_mask & (1ull << VARYING_SLOT_VIEWPORT)) { 6023 outinfo->writes_viewport_index = true; 6024 viewport_index_value = LLVMBuildLoad(ctx->builder, 6025 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_VIEWPORT, 0)], ""); 6026 } 6027 6028 if (outinfo->writes_pointsize || 6029 outinfo->writes_layer || 6030 outinfo->writes_viewport_index) { 6031 pos_args[1].enabled_channels = ((outinfo->writes_pointsize == true ? 1 : 0) | 6032 (outinfo->writes_layer == true ? 4 : 0)); 6033 pos_args[1].valid_mask = 0; 6034 pos_args[1].done = 0; 6035 pos_args[1].target = V_008DFC_SQ_EXP_POS + 1; 6036 pos_args[1].compr = 0; 6037 pos_args[1].out[0] = ctx->ac.f32_0; /* X */ 6038 pos_args[1].out[1] = ctx->ac.f32_0; /* Y */ 6039 pos_args[1].out[2] = ctx->ac.f32_0; /* Z */ 6040 pos_args[1].out[3] = ctx->ac.f32_0; /* W */ 6041 6042 if (outinfo->writes_pointsize == true) 6043 pos_args[1].out[0] = psize_value; 6044 if (outinfo->writes_layer == true) 6045 pos_args[1].out[2] = layer_value; 6046 if (outinfo->writes_viewport_index == true) { 6047 if (ctx->options->chip_class >= GFX9) { 6048 /* GFX9 has the layer in out.z[10:0] and the viewport 6049 * index in out.z[19:16]. 6050 */ 6051 LLVMValueRef v = viewport_index_value; 6052 v = ac_to_integer(&ctx->ac, v); 6053 v = LLVMBuildShl(ctx->builder, v, 6054 LLVMConstInt(ctx->ac.i32, 16, false), 6055 ""); 6056 v = LLVMBuildOr(ctx->builder, v, 6057 ac_to_integer(&ctx->ac, pos_args[1].out[2]), ""); 6058 6059 pos_args[1].out[2] = ac_to_float(&ctx->ac, v); 6060 pos_args[1].enabled_channels |= 1 << 2; 6061 } else { 6062 pos_args[1].out[3] = viewport_index_value; 6063 pos_args[1].enabled_channels |= 1 << 3; 6064 } 6065 } 6066 } 6067 for (i = 0; i < 4; i++) { 6068 if (pos_args[i].out[0]) 6069 num_pos_exports++; 6070 } 6071 6072 pos_idx = 0; 6073 for (i = 0; i < 4; i++) { 6074 if (!pos_args[i].out[0]) 6075 continue; 6076 6077 /* Specify the target we are exporting */ 6078 pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++; 6079 if (pos_idx == num_pos_exports) 6080 pos_args[i].done = 1; 6081 ac_build_export(&ctx->ac, &pos_args[i]); 6082 } 6083 6084 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { 6085 LLVMValueRef values[4]; 6086 if (!(ctx->output_mask & (1ull << i))) 6087 continue; 6088 6089 for (unsigned j = 0; j < 4; j++) 6090 values[j] = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, 6091 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], "")); 6092 6093 if (i == VARYING_SLOT_LAYER) { 6094 target = V_008DFC_SQ_EXP_PARAM + param_count; 6095 outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = param_count; 6096 param_count++; 6097 } else if (i == VARYING_SLOT_PRIMITIVE_ID) { 6098 target = V_008DFC_SQ_EXP_PARAM + param_count; 6099 outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count; 6100 param_count++; 6101 } else if (i >= VARYING_SLOT_VAR0) { 6102 outinfo->export_mask |= 1u << (i - VARYING_SLOT_VAR0); 6103 target = V_008DFC_SQ_EXP_PARAM + param_count; 6104 outinfo->vs_output_param_offset[i] = param_count; 6105 param_count++; 6106 } else 6107 continue; 6108 6109 si_llvm_init_export_args(ctx, values, target, &args); 6110 6111 if (target >= V_008DFC_SQ_EXP_POS && 6112 target <= (V_008DFC_SQ_EXP_POS + 3)) { 6113 memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], 6114 &args, sizeof(args)); 6115 } else { 6116 ac_build_export(&ctx->ac, &args); 6117 } 6118 } 6119 6120 if (export_prim_id) { 6121 LLVMValueRef values[4]; 6122 target = V_008DFC_SQ_EXP_PARAM + param_count; 6123 outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count; 6124 param_count++; 6125 6126 values[0] = ctx->vs_prim_id; 6127 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(2, 6128 ctx->shader_info->vs.vgpr_comp_cnt); 6129 for (unsigned j = 1; j < 4; j++) 6130 values[j] = ctx->ac.f32_0; 6131 si_llvm_init_export_args(ctx, values, target, &args); 6132 ac_build_export(&ctx->ac, &args); 6133 outinfo->export_prim_id = true; 6134 } 6135 6136 outinfo->pos_exports = num_pos_exports; 6137 outinfo->param_exports = param_count; 6138 } 6139 6140 static void 6141 handle_es_outputs_post(struct nir_to_llvm_context *ctx, 6142 struct ac_es_output_info *outinfo) 6143 { 6144 int j; 6145 uint64_t max_output_written = 0; 6146 LLVMValueRef lds_base = NULL; 6147 6148 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { 6149 int param_index; 6150 int length = 4; 6151 6152 if (!(ctx->output_mask & (1ull << i))) 6153 continue; 6154 6155 if (i == VARYING_SLOT_CLIP_DIST0) 6156 length = ctx->num_output_clips + ctx->num_output_culls; 6157 6158 param_index = shader_io_get_unique_index(i); 6159 6160 max_output_written = MAX2(param_index + (length > 4), max_output_written); 6161 } 6162 6163 outinfo->esgs_itemsize = (max_output_written + 1) * 16; 6164 6165 if (ctx->ac.chip_class >= GFX9) { 6166 unsigned itemsize_dw = outinfo->esgs_itemsize / 4; 6167 LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac); 6168 LLVMValueRef wave_idx = ac_build_bfe(&ctx->ac, ctx->merged_wave_info, 6169 LLVMConstInt(ctx->ac.i32, 24, false), 6170 LLVMConstInt(ctx->ac.i32, 4, false), false); 6171 vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx, 6172 LLVMBuildMul(ctx->ac.builder, wave_idx, 6173 LLVMConstInt(ctx->ac.i32, 64, false), ""), ""); 6174 lds_base = LLVMBuildMul(ctx->ac.builder, vertex_idx, 6175 LLVMConstInt(ctx->ac.i32, itemsize_dw, 0), ""); 6176 } 6177 6178 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { 6179 LLVMValueRef dw_addr = NULL; 6180 LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4]; 6181 int param_index; 6182 int length = 4; 6183 6184 if (!(ctx->output_mask & (1ull << i))) 6185 continue; 6186 6187 if (i == VARYING_SLOT_CLIP_DIST0) 6188 length = ctx->num_output_clips + ctx->num_output_culls; 6189 6190 param_index = shader_io_get_unique_index(i); 6191 6192 if (lds_base) { 6193 dw_addr = LLVMBuildAdd(ctx->builder, lds_base, 6194 LLVMConstInt(ctx->ac.i32, param_index * 4, false), 6195 ""); 6196 } 6197 for (j = 0; j < length; j++) { 6198 LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, out_ptr[j], ""); 6199 out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->ac.i32, ""); 6200 6201 if (ctx->ac.chip_class >= GFX9) { 6202 ac_lds_store(&ctx->ac, dw_addr, 6203 LLVMBuildLoad(ctx->builder, out_ptr[j], "")); 6204 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->ac.i32_1, ""); 6205 } else { 6206 ac_build_buffer_store_dword(&ctx->ac, 6207 ctx->esgs_ring, 6208 out_val, 1, 6209 NULL, ctx->es2gs_offset, 6210 (4 * param_index + j) * 4, 6211 1, 1, true, true); 6212 } 6213 } 6214 } 6215 } 6216 6217 static void 6218 handle_ls_outputs_post(struct nir_to_llvm_context *ctx) 6219 { 6220 LLVMValueRef vertex_id = ctx->rel_auto_id; 6221 LLVMValueRef vertex_dw_stride = unpack_param(&ctx->ac, ctx->ls_out_layout, 13, 8); 6222 LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->builder, vertex_id, 6223 vertex_dw_stride, ""); 6224 6225 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { 6226 LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4]; 6227 int length = 4; 6228 6229 if (!(ctx->output_mask & (1ull << i))) 6230 continue; 6231 6232 if (i == VARYING_SLOT_CLIP_DIST0) 6233 length = ctx->num_output_clips + ctx->num_output_culls; 6234 int param = shader_io_get_unique_index(i); 6235 mark_tess_output(ctx, false, param, 1); 6236 if (length > 4) 6237 mark_tess_output(ctx, false, param + 1, 1); 6238 LLVMValueRef dw_addr = LLVMBuildAdd(ctx->builder, base_dw_addr, 6239 LLVMConstInt(ctx->ac.i32, param * 4, false), 6240 ""); 6241 for (unsigned j = 0; j < length; j++) { 6242 ac_lds_store(&ctx->ac, dw_addr, 6243 LLVMBuildLoad(ctx->builder, out_ptr[j], "")); 6244 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->ac.i32_1, ""); 6245 } 6246 } 6247 } 6248 6249 struct ac_build_if_state 6250 { 6251 struct nir_to_llvm_context *ctx; 6252 LLVMValueRef condition; 6253 LLVMBasicBlockRef entry_block; 6254 LLVMBasicBlockRef true_block; 6255 LLVMBasicBlockRef false_block; 6256 LLVMBasicBlockRef merge_block; 6257 }; 6258 6259 static LLVMBasicBlockRef 6260 ac_build_insert_new_block(struct nir_to_llvm_context *ctx, const char *name) 6261 { 6262 LLVMBasicBlockRef current_block; 6263 LLVMBasicBlockRef next_block; 6264 LLVMBasicBlockRef new_block; 6265 6266 /* get current basic block */ 6267 current_block = LLVMGetInsertBlock(ctx->builder); 6268 6269 /* chqeck if there's another block after this one */ 6270 next_block = LLVMGetNextBasicBlock(current_block); 6271 if (next_block) { 6272 /* insert the new block before the next block */ 6273 new_block = LLVMInsertBasicBlockInContext(ctx->context, next_block, name); 6274 } 6275 else { 6276 /* append new block after current block */ 6277 LLVMValueRef function = LLVMGetBasicBlockParent(current_block); 6278 new_block = LLVMAppendBasicBlockInContext(ctx->context, function, name); 6279 } 6280 return new_block; 6281 } 6282 6283 static void 6284 ac_nir_build_if(struct ac_build_if_state *ifthen, 6285 struct nir_to_llvm_context *ctx, 6286 LLVMValueRef condition) 6287 { 6288 LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->builder); 6289 6290 memset(ifthen, 0, sizeof *ifthen); 6291 ifthen->ctx = ctx; 6292 ifthen->condition = condition; 6293 ifthen->entry_block = block; 6294 6295 /* create endif/merge basic block for the phi functions */ 6296 ifthen->merge_block = ac_build_insert_new_block(ctx, "endif-block"); 6297 6298 /* create/insert true_block before merge_block */ 6299 ifthen->true_block = 6300 LLVMInsertBasicBlockInContext(ctx->context, 6301 ifthen->merge_block, 6302 "if-true-block"); 6303 6304 /* successive code goes into the true block */ 6305 LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block); 6306 } 6307 6308 /** 6309 * End a conditional. 6310 */ 6311 static void 6312 ac_nir_build_endif(struct ac_build_if_state *ifthen) 6313 { 6314 LLVMBuilderRef builder = ifthen->ctx->builder; 6315 6316 /* Insert branch to the merge block from current block */ 6317 LLVMBuildBr(builder, ifthen->merge_block); 6318 6319 /* 6320 * Now patch in the various branch instructions. 6321 */ 6322 6323 /* Insert the conditional branch instruction at the end of entry_block */ 6324 LLVMPositionBuilderAtEnd(builder, ifthen->entry_block); 6325 if (ifthen->false_block) { 6326 /* we have an else clause */ 6327 LLVMBuildCondBr(builder, ifthen->condition, 6328 ifthen->true_block, ifthen->false_block); 6329 } 6330 else { 6331 /* no else clause */ 6332 LLVMBuildCondBr(builder, ifthen->condition, 6333 ifthen->true_block, ifthen->merge_block); 6334 } 6335 6336 /* Resume building code at end of the ifthen->merge_block */ 6337 LLVMPositionBuilderAtEnd(builder, ifthen->merge_block); 6338 } 6339 6340 static void 6341 write_tess_factors(struct nir_to_llvm_context *ctx) 6342 { 6343 unsigned stride, outer_comps, inner_comps; 6344 struct ac_build_if_state if_ctx, inner_if_ctx; 6345 LLVMValueRef invocation_id = unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 8, 5); 6346 LLVMValueRef rel_patch_id = unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 0, 8); 6347 unsigned tess_inner_index, tess_outer_index; 6348 LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer; 6349 LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4]; 6350 int i; 6351 emit_barrier(&ctx->ac, ctx->stage); 6352 6353 switch (ctx->options->key.tcs.primitive_mode) { 6354 case GL_ISOLINES: 6355 stride = 2; 6356 outer_comps = 2; 6357 inner_comps = 0; 6358 break; 6359 case GL_TRIANGLES: 6360 stride = 4; 6361 outer_comps = 3; 6362 inner_comps = 1; 6363 break; 6364 case GL_QUADS: 6365 stride = 6; 6366 outer_comps = 4; 6367 inner_comps = 2; 6368 break; 6369 default: 6370 return; 6371 } 6372 6373 ac_nir_build_if(&if_ctx, ctx, 6374 LLVMBuildICmp(ctx->builder, LLVMIntEQ, 6375 invocation_id, ctx->ac.i32_0, "")); 6376 6377 tess_inner_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER); 6378 tess_outer_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER); 6379 6380 mark_tess_output(ctx, true, tess_inner_index, 1); 6381 mark_tess_output(ctx, true, tess_outer_index, 1); 6382 lds_base = get_tcs_out_current_patch_data_offset(ctx); 6383 lds_inner = LLVMBuildAdd(ctx->builder, lds_base, 6384 LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, false), ""); 6385 lds_outer = LLVMBuildAdd(ctx->builder, lds_base, 6386 LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, false), ""); 6387 6388 for (i = 0; i < 4; i++) { 6389 inner[i] = LLVMGetUndef(ctx->ac.i32); 6390 outer[i] = LLVMGetUndef(ctx->ac.i32); 6391 } 6392 6393 // LINES reverseal 6394 if (ctx->options->key.tcs.primitive_mode == GL_ISOLINES) { 6395 outer[0] = out[1] = ac_lds_load(&ctx->ac, lds_outer); 6396 lds_outer = LLVMBuildAdd(ctx->builder, lds_outer, 6397 ctx->ac.i32_1, ""); 6398 outer[1] = out[0] = ac_lds_load(&ctx->ac, lds_outer); 6399 } else { 6400 for (i = 0; i < outer_comps; i++) { 6401 outer[i] = out[i] = 6402 ac_lds_load(&ctx->ac, lds_outer); 6403 lds_outer = LLVMBuildAdd(ctx->builder, lds_outer, 6404 ctx->ac.i32_1, ""); 6405 } 6406 for (i = 0; i < inner_comps; i++) { 6407 inner[i] = out[outer_comps+i] = 6408 ac_lds_load(&ctx->ac, lds_inner); 6409 lds_inner = LLVMBuildAdd(ctx->builder, lds_inner, 6410 ctx->ac.i32_1, ""); 6411 } 6412 } 6413 6414 /* Convert the outputs to vectors for stores. */ 6415 vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4)); 6416 vec1 = NULL; 6417 6418 if (stride > 4) 6419 vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4); 6420 6421 6422 buffer = ctx->hs_ring_tess_factor; 6423 tf_base = ctx->tess_factor_offset; 6424 byteoffset = LLVMBuildMul(ctx->builder, rel_patch_id, 6425 LLVMConstInt(ctx->ac.i32, 4 * stride, false), ""); 6426 unsigned tf_offset = 0; 6427 6428 if (ctx->options->chip_class <= VI) { 6429 ac_nir_build_if(&inner_if_ctx, ctx, 6430 LLVMBuildICmp(ctx->builder, LLVMIntEQ, 6431 rel_patch_id, ctx->ac.i32_0, "")); 6432 6433 /* Store the dynamic HS control word. */ 6434 ac_build_buffer_store_dword(&ctx->ac, buffer, 6435 LLVMConstInt(ctx->ac.i32, 0x80000000, false), 6436 1, ctx->ac.i32_0, tf_base, 6437 0, 1, 0, true, false); 6438 tf_offset += 4; 6439 6440 ac_nir_build_endif(&inner_if_ctx); 6441 } 6442 6443 /* Store the tessellation factors. */ 6444 ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, 6445 MIN2(stride, 4), byteoffset, tf_base, 6446 tf_offset, 1, 0, true, false); 6447 if (vec1) 6448 ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, 6449 stride - 4, byteoffset, tf_base, 6450 16 + tf_offset, 1, 0, true, false); 6451 6452 //store to offchip for TES to read - only if TES reads them 6453 if (ctx->options->key.tcs.tes_reads_tess_factors) { 6454 LLVMValueRef inner_vec, outer_vec, tf_outer_offset; 6455 LLVMValueRef tf_inner_offset; 6456 unsigned param_outer, param_inner; 6457 6458 param_outer = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER); 6459 tf_outer_offset = get_tcs_tes_buffer_address(ctx, NULL, 6460 LLVMConstInt(ctx->ac.i32, param_outer, 0)); 6461 6462 outer_vec = ac_build_gather_values(&ctx->ac, outer, 6463 util_next_power_of_two(outer_comps)); 6464 6465 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, outer_vec, 6466 outer_comps, tf_outer_offset, 6467 ctx->oc_lds, 0, 1, 0, true, false); 6468 if (inner_comps) { 6469 param_inner = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER); 6470 tf_inner_offset = get_tcs_tes_buffer_address(ctx, NULL, 6471 LLVMConstInt(ctx->ac.i32, param_inner, 0)); 6472 6473 inner_vec = inner_comps == 1 ? inner[0] : 6474 ac_build_gather_values(&ctx->ac, inner, inner_comps); 6475 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, inner_vec, 6476 inner_comps, tf_inner_offset, 6477 ctx->oc_lds, 0, 1, 0, true, false); 6478 } 6479 } 6480 ac_nir_build_endif(&if_ctx); 6481 } 6482 6483 static void 6484 handle_tcs_outputs_post(struct nir_to_llvm_context *ctx) 6485 { 6486 write_tess_factors(ctx); 6487 } 6488 6489 static bool 6490 si_export_mrt_color(struct nir_to_llvm_context *ctx, 6491 LLVMValueRef *color, unsigned param, bool is_last, 6492 struct ac_export_args *args) 6493 { 6494 /* Export */ 6495 si_llvm_init_export_args(ctx, color, param, 6496 args); 6497 6498 if (is_last) { 6499 args->valid_mask = 1; /* whether the EXEC mask is valid */ 6500 args->done = 1; /* DONE bit */ 6501 } else if (!args->enabled_channels) 6502 return false; /* unnecessary NULL export */ 6503 6504 return true; 6505 } 6506 6507 static void 6508 radv_export_mrt_z(struct nir_to_llvm_context *ctx, 6509 LLVMValueRef depth, LLVMValueRef stencil, 6510 LLVMValueRef samplemask) 6511 { 6512 struct ac_export_args args; 6513 6514 ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args); 6515 6516 ac_build_export(&ctx->ac, &args); 6517 } 6518 6519 static void 6520 handle_fs_outputs_post(struct nir_to_llvm_context *ctx) 6521 { 6522 unsigned index = 0; 6523 LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; 6524 struct ac_export_args color_args[8]; 6525 6526 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { 6527 LLVMValueRef values[4]; 6528 6529 if (!(ctx->output_mask & (1ull << i))) 6530 continue; 6531 6532 if (i == FRAG_RESULT_DEPTH) { 6533 ctx->shader_info->fs.writes_z = true; 6534 depth = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, 6535 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); 6536 } else if (i == FRAG_RESULT_STENCIL) { 6537 ctx->shader_info->fs.writes_stencil = true; 6538 stencil = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, 6539 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); 6540 } else if (i == FRAG_RESULT_SAMPLE_MASK) { 6541 ctx->shader_info->fs.writes_sample_mask = true; 6542 samplemask = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, 6543 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); 6544 } else { 6545 bool last = false; 6546 for (unsigned j = 0; j < 4; j++) 6547 values[j] = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, 6548 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], "")); 6549 6550 if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil && !ctx->shader_info->fs.writes_sample_mask) 6551 last = ctx->output_mask <= ((1ull << (i + 1)) - 1); 6552 6553 bool ret = si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + (i - FRAG_RESULT_DATA0), last, &color_args[index]); 6554 if (ret) 6555 index++; 6556 } 6557 } 6558 6559 for (unsigned i = 0; i < index; i++) 6560 ac_build_export(&ctx->ac, &color_args[i]); 6561 if (depth || stencil || samplemask) 6562 radv_export_mrt_z(ctx, depth, stencil, samplemask); 6563 else if (!index) { 6564 si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true, &color_args[0]); 6565 ac_build_export(&ctx->ac, &color_args[0]); 6566 } 6567 } 6568 6569 static void 6570 emit_gs_epilogue(struct nir_to_llvm_context *ctx) 6571 { 6572 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, ctx->gs_wave_id); 6573 } 6574 6575 static void 6576 handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs, 6577 LLVMValueRef *addrs) 6578 { 6579 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); 6580 6581 switch (ctx->stage) { 6582 case MESA_SHADER_VERTEX: 6583 if (ctx->options->key.vs.as_ls) 6584 handle_ls_outputs_post(ctx); 6585 else if (ctx->options->key.vs.as_es) 6586 handle_es_outputs_post(ctx, &ctx->shader_info->vs.es_info); 6587 else 6588 handle_vs_outputs_post(ctx, ctx->options->key.vs.export_prim_id, 6589 &ctx->shader_info->vs.outinfo); 6590 break; 6591 case MESA_SHADER_FRAGMENT: 6592 handle_fs_outputs_post(ctx); 6593 break; 6594 case MESA_SHADER_GEOMETRY: 6595 emit_gs_epilogue(ctx); 6596 break; 6597 case MESA_SHADER_TESS_CTRL: 6598 handle_tcs_outputs_post(ctx); 6599 break; 6600 case MESA_SHADER_TESS_EVAL: 6601 if (ctx->options->key.tes.as_es) 6602 handle_es_outputs_post(ctx, &ctx->shader_info->tes.es_info); 6603 else 6604 handle_vs_outputs_post(ctx, ctx->options->key.tes.export_prim_id, 6605 &ctx->shader_info->tes.outinfo); 6606 break; 6607 default: 6608 break; 6609 } 6610 } 6611 6612 static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx) 6613 { 6614 LLVMPassManagerRef passmgr; 6615 /* Create the pass manager */ 6616 passmgr = LLVMCreateFunctionPassManagerForModule( 6617 ctx->module); 6618 6619 /* This pass should eliminate all the load and store instructions */ 6620 LLVMAddPromoteMemoryToRegisterPass(passmgr); 6621 6622 /* Add some optimization passes */ 6623 LLVMAddScalarReplAggregatesPass(passmgr); 6624 LLVMAddLICMPass(passmgr); 6625 LLVMAddAggressiveDCEPass(passmgr); 6626 LLVMAddCFGSimplificationPass(passmgr); 6627 LLVMAddInstructionCombiningPass(passmgr); 6628 6629 /* Run the pass */ 6630 LLVMInitializeFunctionPassManager(passmgr); 6631 LLVMRunFunctionPassManager(passmgr, ctx->main_function); 6632 LLVMFinalizeFunctionPassManager(passmgr); 6633 6634 LLVMDisposeBuilder(ctx->builder); 6635 LLVMDisposePassManager(passmgr); 6636 6637 ac_llvm_context_dispose(&ctx->ac); 6638 } 6639 6640 static void 6641 ac_nir_eliminate_const_vs_outputs(struct nir_to_llvm_context *ctx) 6642 { 6643 struct ac_vs_output_info *outinfo; 6644 6645 switch (ctx->stage) { 6646 case MESA_SHADER_FRAGMENT: 6647 case MESA_SHADER_COMPUTE: 6648 case MESA_SHADER_TESS_CTRL: 6649 case MESA_SHADER_GEOMETRY: 6650 return; 6651 case MESA_SHADER_VERTEX: 6652 if (ctx->options->key.vs.as_ls || 6653 ctx->options->key.vs.as_es) 6654 return; 6655 outinfo = &ctx->shader_info->vs.outinfo; 6656 break; 6657 case MESA_SHADER_TESS_EVAL: 6658 if (ctx->options->key.vs.as_es) 6659 return; 6660 outinfo = &ctx->shader_info->tes.outinfo; 6661 break; 6662 default: 6663 unreachable("Unhandled shader type"); 6664 } 6665 6666 ac_optimize_vs_outputs(&ctx->ac, 6667 ctx->main_function, 6668 outinfo->vs_output_param_offset, 6669 VARYING_SLOT_MAX, 6670 &outinfo->param_exports); 6671 } 6672 6673 static void 6674 ac_setup_rings(struct nir_to_llvm_context *ctx) 6675 { 6676 if ((ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) || 6677 (ctx->stage == MESA_SHADER_TESS_EVAL && ctx->options->key.tes.as_es)) { 6678 ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_VS, false)); 6679 } 6680 6681 if (ctx->is_gs_copy_shader) { 6682 ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_GSVS_VS, false)); 6683 } 6684 if (ctx->stage == MESA_SHADER_GEOMETRY) { 6685 LLVMValueRef tmp; 6686 ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_GS, false)); 6687 ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_GSVS_GS, false)); 6688 6689 ctx->gsvs_ring = LLVMBuildBitCast(ctx->builder, ctx->gsvs_ring, ctx->ac.v4i32, ""); 6690 6691 ctx->gsvs_ring = LLVMBuildInsertElement(ctx->builder, ctx->gsvs_ring, ctx->gsvs_num_entries, LLVMConstInt(ctx->ac.i32, 2, false), ""); 6692 tmp = LLVMBuildExtractElement(ctx->builder, ctx->gsvs_ring, ctx->ac.i32_1, ""); 6693 tmp = LLVMBuildOr(ctx->builder, tmp, ctx->gsvs_ring_stride, ""); 6694 ctx->gsvs_ring = LLVMBuildInsertElement(ctx->builder, ctx->gsvs_ring, tmp, ctx->ac.i32_1, ""); 6695 } 6696 6697 if (ctx->stage == MESA_SHADER_TESS_CTRL || 6698 ctx->stage == MESA_SHADER_TESS_EVAL) { 6699 ctx->hs_ring_tess_offchip = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_OFFCHIP, false)); 6700 ctx->hs_ring_tess_factor = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_FACTOR, false)); 6701 } 6702 } 6703 6704 static unsigned 6705 ac_nir_get_max_workgroup_size(enum chip_class chip_class, 6706 const struct nir_shader *nir) 6707 { 6708 switch (nir->info.stage) { 6709 case MESA_SHADER_TESS_CTRL: 6710 return chip_class >= CIK ? 128 : 64; 6711 case MESA_SHADER_GEOMETRY: 6712 return chip_class >= GFX9 ? 128 : 64; 6713 case MESA_SHADER_COMPUTE: 6714 break; 6715 default: 6716 return 0; 6717 } 6718 6719 unsigned max_workgroup_size = nir->info.cs.local_size[0] * 6720 nir->info.cs.local_size[1] * 6721 nir->info.cs.local_size[2]; 6722 return max_workgroup_size; 6723 } 6724 6725 /* Fixup the HW not emitting the TCS regs if there are no HS threads. */ 6726 static void ac_nir_fixup_ls_hs_input_vgprs(struct nir_to_llvm_context *ctx) 6727 { 6728 LLVMValueRef count = ac_build_bfe(&ctx->ac, ctx->merged_wave_info, 6729 LLVMConstInt(ctx->ac.i32, 8, false), 6730 LLVMConstInt(ctx->ac.i32, 8, false), false); 6731 LLVMValueRef hs_empty = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, count, 6732 ctx->ac.i32_0, ""); 6733 ctx->abi.instance_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->rel_auto_id, ctx->abi.instance_id, ""); 6734 ctx->vs_prim_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.vertex_id, ctx->vs_prim_id, ""); 6735 ctx->rel_auto_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_rel_ids, ctx->rel_auto_id, ""); 6736 ctx->abi.vertex_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_patch_id, ctx->abi.vertex_id, ""); 6737 } 6738 6739 static void prepare_gs_input_vgprs(struct nir_to_llvm_context *ctx) 6740 { 6741 for(int i = 5; i >= 0; --i) { 6742 ctx->gs_vtx_offset[i] = ac_build_bfe(&ctx->ac, ctx->gs_vtx_offset[i & ~1], 6743 LLVMConstInt(ctx->ac.i32, (i & 1) * 16, false), 6744 LLVMConstInt(ctx->ac.i32, 16, false), false); 6745 } 6746 6747 ctx->gs_wave_id = ac_build_bfe(&ctx->ac, ctx->merged_wave_info, 6748 LLVMConstInt(ctx->ac.i32, 16, false), 6749 LLVMConstInt(ctx->ac.i32, 8, false), false); 6750 } 6751 6752 void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, 6753 struct nir_shader *nir, struct nir_to_llvm_context *nctx) 6754 { 6755 struct ac_nir_context ctx = {}; 6756 struct nir_function *func; 6757 6758 ctx.ac = *ac; 6759 ctx.abi = abi; 6760 6761 ctx.nctx = nctx; 6762 if (nctx) 6763 nctx->nir = &ctx; 6764 6765 ctx.stage = nir->info.stage; 6766 6767 ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder)); 6768 6769 nir_foreach_variable(variable, &nir->outputs) 6770 handle_shader_output_decl(&ctx, nir, variable); 6771 6772 ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, 6773 _mesa_key_pointer_equal); 6774 ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer, 6775 _mesa_key_pointer_equal); 6776 ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, 6777 _mesa_key_pointer_equal); 6778 6779 func = (struct nir_function *)exec_list_get_head(&nir->functions); 6780 6781 setup_locals(&ctx, func); 6782 6783 if (nir->info.stage == MESA_SHADER_COMPUTE) 6784 setup_shared(&ctx, nir); 6785 6786 visit_cf_list(&ctx, &func->impl->body); 6787 phi_post_pass(&ctx); 6788 6789 ctx.abi->emit_outputs(ctx.abi, RADEON_LLVM_MAX_OUTPUTS, 6790 ctx.outputs); 6791 6792 free(ctx.locals); 6793 ralloc_free(ctx.defs); 6794 ralloc_free(ctx.phis); 6795 ralloc_free(ctx.vars); 6796 6797 if (nctx) 6798 nctx->nir = NULL; 6799 } 6800 6801 static 6802 LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, 6803 struct nir_shader *const *shaders, 6804 int shader_count, 6805 struct ac_shader_variant_info *shader_info, 6806 const struct ac_nir_compiler_options *options) 6807 { 6808 struct nir_to_llvm_context ctx = {0}; 6809 unsigned i; 6810 ctx.options = options; 6811 ctx.shader_info = shader_info; 6812 ctx.context = LLVMContextCreate(); 6813 ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context); 6814 6815 ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class, 6816 options->family); 6817 ctx.ac.module = ctx.module; 6818 LLVMSetTarget(ctx.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--"); 6819 6820 LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm); 6821 char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout); 6822 LLVMSetDataLayout(ctx.module, data_layout_str); 6823 LLVMDisposeTargetData(data_layout); 6824 LLVMDisposeMessage(data_layout_str); 6825 6826 enum ac_float_mode float_mode = 6827 options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH : 6828 AC_FLOAT_MODE_DEFAULT; 6829 6830 ctx.builder = ac_create_builder(ctx.context, float_mode); 6831 ctx.ac.builder = ctx.builder; 6832 6833 memset(shader_info, 0, sizeof(*shader_info)); 6834 6835 for(int i = 0; i < shader_count; ++i) 6836 ac_nir_shader_info_pass(shaders[i], options, &shader_info->info); 6837 6838 for (i = 0; i < AC_UD_MAX_SETS; i++) 6839 shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1; 6840 for (i = 0; i < AC_UD_MAX_UD; i++) 6841 shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1; 6842 6843 ctx.max_workgroup_size = 0; 6844 for (int i = 0; i < shader_count; ++i) { 6845 ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size, 6846 ac_nir_get_max_workgroup_size(ctx.options->chip_class, 6847 shaders[i])); 6848 } 6849 6850 create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2, 6851 shader_count >= 2 ? shaders[shader_count - 2]->info.stage : MESA_SHADER_VERTEX); 6852 6853 ctx.abi.inputs = &ctx.inputs[0]; 6854 ctx.abi.emit_outputs = handle_shader_outputs_post; 6855 ctx.abi.emit_vertex = visit_emit_vertex; 6856 ctx.abi.load_ubo = radv_load_ubo; 6857 ctx.abi.load_ssbo = radv_load_ssbo; 6858 ctx.abi.load_sampler_desc = radv_get_sampler_desc; 6859 ctx.abi.clamp_shadow_reference = false; 6860 ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9; 6861 6862 if (shader_count >= 2) 6863 ac_init_exec_full_mask(&ctx.ac); 6864 6865 if (ctx.ac.chip_class == GFX9 && 6866 shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL) 6867 ac_nir_fixup_ls_hs_input_vgprs(&ctx); 6868 6869 for(int i = 0; i < shader_count; ++i) { 6870 ctx.stage = shaders[i]->info.stage; 6871 ctx.output_mask = 0; 6872 ctx.tess_outputs_written = 0; 6873 ctx.num_output_clips = shaders[i]->info.clip_distance_array_size; 6874 ctx.num_output_culls = shaders[i]->info.cull_distance_array_size; 6875 6876 if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) { 6877 ctx.gs_next_vertex = ac_build_alloca(&ctx.ac, ctx.ac.i32, "gs_next_vertex"); 6878 ctx.gs_max_out_vertices = shaders[i]->info.gs.vertices_out; 6879 ctx.abi.load_inputs = load_gs_input; 6880 ctx.abi.emit_primitive = visit_end_primitive; 6881 } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) { 6882 ctx.tcs_outputs_read = shaders[i]->info.outputs_read; 6883 ctx.tcs_patch_outputs_read = shaders[i]->info.patch_outputs_read; 6884 ctx.abi.load_tess_varyings = load_tcs_varyings; 6885 ctx.abi.load_patch_vertices_in = load_patch_vertices_in; 6886 ctx.abi.store_tcs_outputs = store_tcs_output; 6887 } else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) { 6888 ctx.tes_primitive_mode = shaders[i]->info.tess.primitive_mode; 6889 ctx.abi.load_tess_varyings = load_tes_input; 6890 ctx.abi.load_tess_coord = load_tess_coord; 6891 ctx.abi.load_patch_vertices_in = load_patch_vertices_in; 6892 } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) { 6893 if (shader_info->info.vs.needs_instance_id) { 6894 if (ctx.options->key.vs.as_ls) { 6895 ctx.shader_info->vs.vgpr_comp_cnt = 6896 MAX2(2, ctx.shader_info->vs.vgpr_comp_cnt); 6897 } else { 6898 ctx.shader_info->vs.vgpr_comp_cnt = 6899 MAX2(1, ctx.shader_info->vs.vgpr_comp_cnt); 6900 } 6901 } 6902 } else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) { 6903 shader_info->fs.can_discard = shaders[i]->info.fs.uses_discard; 6904 } 6905 6906 if (i) 6907 emit_barrier(&ctx.ac, ctx.stage); 6908 6909 ac_setup_rings(&ctx); 6910 6911 LLVMBasicBlockRef merge_block; 6912 if (shader_count >= 2) { 6913 LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder)); 6914 LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, ""); 6915 merge_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, ""); 6916 6917 LLVMValueRef count = ac_build_bfe(&ctx.ac, ctx.merged_wave_info, 6918 LLVMConstInt(ctx.ac.i32, 8 * i, false), 6919 LLVMConstInt(ctx.ac.i32, 8, false), false); 6920 LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac); 6921 LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT, 6922 thread_id, count, ""); 6923 LLVMBuildCondBr(ctx.ac.builder, cond, then_block, merge_block); 6924 6925 LLVMPositionBuilderAtEnd(ctx.ac.builder, then_block); 6926 } 6927 6928 if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) 6929 handle_fs_inputs(&ctx, shaders[i]); 6930 else if(shaders[i]->info.stage == MESA_SHADER_VERTEX) 6931 handle_vs_inputs(&ctx, shaders[i]); 6932 else if(shader_count >= 2 && shaders[i]->info.stage == MESA_SHADER_GEOMETRY) 6933 prepare_gs_input_vgprs(&ctx); 6934 6935 nir_foreach_variable(variable, &shaders[i]->outputs) 6936 scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage); 6937 6938 ac_nir_translate(&ctx.ac, &ctx.abi, shaders[i], &ctx); 6939 6940 if (shader_count >= 2) { 6941 LLVMBuildBr(ctx.ac.builder, merge_block); 6942 LLVMPositionBuilderAtEnd(ctx.ac.builder, merge_block); 6943 } 6944 6945 if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) { 6946 unsigned addclip = shaders[i]->info.clip_distance_array_size + 6947 shaders[i]->info.cull_distance_array_size > 4; 6948 shader_info->gs.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16; 6949 shader_info->gs.max_gsvs_emit_size = shader_info->gs.gsvs_vertex_size * 6950 shaders[i]->info.gs.vertices_out; 6951 } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) { 6952 shader_info->tcs.outputs_written = ctx.tess_outputs_written; 6953 shader_info->tcs.patch_outputs_written = ctx.tess_patch_outputs_written; 6954 } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX && ctx.options->key.vs.as_ls) { 6955 shader_info->vs.outputs_written = ctx.tess_outputs_written; 6956 } 6957 } 6958 6959 LLVMBuildRetVoid(ctx.builder); 6960 6961 if (options->dump_preoptir) 6962 ac_dump_module(ctx.module); 6963 6964 ac_llvm_finalize_module(&ctx); 6965 6966 if (shader_count == 1) 6967 ac_nir_eliminate_const_vs_outputs(&ctx); 6968 6969 return ctx.module; 6970 } 6971 6972 static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context) 6973 { 6974 unsigned *retval = (unsigned *)context; 6975 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di); 6976 char *description = LLVMGetDiagInfoDescription(di); 6977 6978 if (severity == LLVMDSError) { 6979 *retval = 1; 6980 fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n", 6981 description); 6982 } 6983 6984 LLVMDisposeMessage(description); 6985 } 6986 6987 static unsigned ac_llvm_compile(LLVMModuleRef M, 6988 struct ac_shader_binary *binary, 6989 LLVMTargetMachineRef tm) 6990 { 6991 unsigned retval = 0; 6992 char *err; 6993 LLVMContextRef llvm_ctx; 6994 LLVMMemoryBufferRef out_buffer; 6995 unsigned buffer_size; 6996 const char *buffer_data; 6997 LLVMBool mem_err; 6998 6999 /* Setup Diagnostic Handler*/ 7000 llvm_ctx = LLVMGetModuleContext(M); 7001 7002 LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler, 7003 &retval); 7004 7005 /* Compile IR*/ 7006 mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, 7007 &err, &out_buffer); 7008 7009 /* Process Errors/Warnings */ 7010 if (mem_err) { 7011 fprintf(stderr, "%s: %s", __FUNCTION__, err); 7012 free(err); 7013 retval = 1; 7014 goto out; 7015 } 7016 7017 /* Extract Shader Code*/ 7018 buffer_size = LLVMGetBufferSize(out_buffer); 7019 buffer_data = LLVMGetBufferStart(out_buffer); 7020 7021 ac_elf_read(buffer_data, buffer_size, binary); 7022 7023 /* Clean up */ 7024 LLVMDisposeMemoryBuffer(out_buffer); 7025 7026 out: 7027 return retval; 7028 } 7029 7030 static void ac_compile_llvm_module(LLVMTargetMachineRef tm, 7031 LLVMModuleRef llvm_module, 7032 struct ac_shader_binary *binary, 7033 struct ac_shader_config *config, 7034 struct ac_shader_variant_info *shader_info, 7035 gl_shader_stage stage, 7036 bool dump_shader, bool supports_spill) 7037 { 7038 if (dump_shader) 7039 ac_dump_module(llvm_module); 7040 7041 memset(binary, 0, sizeof(*binary)); 7042 int v = ac_llvm_compile(llvm_module, binary, tm); 7043 if (v) { 7044 fprintf(stderr, "compile failed\n"); 7045 } 7046 7047 if (dump_shader) 7048 fprintf(stderr, "disasm:\n%s\n", binary->disasm_string); 7049 7050 ac_shader_binary_read_config(binary, config, 0, supports_spill); 7051 7052 LLVMContextRef ctx = LLVMGetModuleContext(llvm_module); 7053 LLVMDisposeModule(llvm_module); 7054 LLVMContextDispose(ctx); 7055 7056 if (stage == MESA_SHADER_FRAGMENT) { 7057 shader_info->num_input_vgprs = 0; 7058 if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr)) 7059 shader_info->num_input_vgprs += 2; 7060 if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr)) 7061 shader_info->num_input_vgprs += 2; 7062 if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr)) 7063 shader_info->num_input_vgprs += 2; 7064 if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr)) 7065 shader_info->num_input_vgprs += 3; 7066 if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr)) 7067 shader_info->num_input_vgprs += 2; 7068 if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr)) 7069 shader_info->num_input_vgprs += 2; 7070 if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr)) 7071 shader_info->num_input_vgprs += 2; 7072 if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr)) 7073 shader_info->num_input_vgprs += 1; 7074 if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr)) 7075 shader_info->num_input_vgprs += 1; 7076 if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr)) 7077 shader_info->num_input_vgprs += 1; 7078 if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr)) 7079 shader_info->num_input_vgprs += 1; 7080 if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr)) 7081 shader_info->num_input_vgprs += 1; 7082 if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr)) 7083 shader_info->num_input_vgprs += 1; 7084 if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr)) 7085 shader_info->num_input_vgprs += 1; 7086 if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr)) 7087 shader_info->num_input_vgprs += 1; 7088 if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr)) 7089 shader_info->num_input_vgprs += 1; 7090 } 7091 config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs); 7092 7093 /* +3 for scratch wave offset and VCC */ 7094 config->num_sgprs = MAX2(config->num_sgprs, 7095 shader_info->num_input_sgprs + 3); 7096 7097 /* Enable 64-bit and 16-bit denormals, because there is no performance 7098 * cost. 7099 * 7100 * If denormals are enabled, all floating-point output modifiers are 7101 * ignored. 7102 * 7103 * Don't enable denormals for 32-bit floats, because: 7104 * - Floating-point output modifiers would be ignored by the hw. 7105 * - Some opcodes don't support denormals, such as v_mad_f32. We would 7106 * have to stop using those. 7107 * - SI & CI would be very slow. 7108 */ 7109 config->float_mode |= V_00B028_FP_64_DENORMS; 7110 } 7111 7112 static void 7113 ac_fill_shader_info(struct ac_shader_variant_info *shader_info, struct nir_shader *nir, const struct ac_nir_compiler_options *options) 7114 { 7115 switch (nir->info.stage) { 7116 case MESA_SHADER_COMPUTE: 7117 for (int i = 0; i < 3; ++i) 7118 shader_info->cs.block_size[i] = nir->info.cs.local_size[i]; 7119 break; 7120 case MESA_SHADER_FRAGMENT: 7121 shader_info->fs.early_fragment_test = nir->info.fs.early_fragment_tests; 7122 break; 7123 case MESA_SHADER_GEOMETRY: 7124 shader_info->gs.vertices_in = nir->info.gs.vertices_in; 7125 shader_info->gs.vertices_out = nir->info.gs.vertices_out; 7126 shader_info->gs.output_prim = nir->info.gs.output_primitive; 7127 shader_info->gs.invocations = nir->info.gs.invocations; 7128 break; 7129 case MESA_SHADER_TESS_EVAL: 7130 shader_info->tes.primitive_mode = nir->info.tess.primitive_mode; 7131 shader_info->tes.spacing = nir->info.tess.spacing; 7132 shader_info->tes.ccw = nir->info.tess.ccw; 7133 shader_info->tes.point_mode = nir->info.tess.point_mode; 7134 shader_info->tes.as_es = options->key.tes.as_es; 7135 break; 7136 case MESA_SHADER_TESS_CTRL: 7137 shader_info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out; 7138 break; 7139 case MESA_SHADER_VERTEX: 7140 shader_info->vs.as_es = options->key.vs.as_es; 7141 shader_info->vs.as_ls = options->key.vs.as_ls; 7142 /* in LS mode we need at least 1, invocation id needs 2, handled elsewhere */ 7143 if (options->key.vs.as_ls) 7144 shader_info->vs.vgpr_comp_cnt = MAX2(1, shader_info->vs.vgpr_comp_cnt); 7145 break; 7146 default: 7147 break; 7148 } 7149 } 7150 7151 void ac_compile_nir_shader(LLVMTargetMachineRef tm, 7152 struct ac_shader_binary *binary, 7153 struct ac_shader_config *config, 7154 struct ac_shader_variant_info *shader_info, 7155 struct nir_shader *const *nir, 7156 int nir_count, 7157 const struct ac_nir_compiler_options *options, 7158 bool dump_shader) 7159 { 7160 7161 LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, nir_count, shader_info, 7162 options); 7163 7164 ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir[0]->info.stage, dump_shader, options->supports_spill); 7165 for (int i = 0; i < nir_count; ++i) 7166 ac_fill_shader_info(shader_info, nir[i], options); 7167 7168 /* Determine the ES type (VS or TES) for the GS on GFX9. */ 7169 if (options->chip_class == GFX9) { 7170 if (nir_count == 2 && 7171 nir[1]->info.stage == MESA_SHADER_GEOMETRY) { 7172 shader_info->gs.es_type = nir[0]->info.stage; 7173 } 7174 } 7175 } 7176 7177 static void 7178 ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx) 7179 { 7180 LLVMValueRef args[9]; 7181 args[0] = ctx->gsvs_ring; 7182 args[1] = LLVMBuildMul(ctx->builder, ctx->abi.vertex_id, LLVMConstInt(ctx->ac.i32, 4, false), ""); 7183 args[3] = ctx->ac.i32_0; 7184 args[4] = ctx->ac.i32_1; /* OFFEN */ 7185 args[5] = ctx->ac.i32_0; /* IDXEN */ 7186 args[6] = ctx->ac.i32_1; /* GLC */ 7187 args[7] = ctx->ac.i32_1; /* SLC */ 7188 args[8] = ctx->ac.i32_0; /* TFE */ 7189 7190 int idx = 0; 7191 7192 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { 7193 int length = 4; 7194 int slot = idx; 7195 int slot_inc = 1; 7196 if (!(ctx->output_mask & (1ull << i))) 7197 continue; 7198 7199 if (i == VARYING_SLOT_CLIP_DIST0) { 7200 /* unpack clip and cull from a single set of slots */ 7201 length = ctx->num_output_clips + ctx->num_output_culls; 7202 if (length > 4) 7203 slot_inc = 2; 7204 } 7205 7206 for (unsigned j = 0; j < length; j++) { 7207 LLVMValueRef value; 7208 args[2] = LLVMConstInt(ctx->ac.i32, 7209 (slot * 4 + j) * 7210 ctx->gs_max_out_vertices * 16 * 4, false); 7211 7212 value = ac_build_intrinsic(&ctx->ac, 7213 "llvm.SI.buffer.load.dword.i32.i32", 7214 ctx->ac.i32, args, 9, 7215 AC_FUNC_ATTR_READONLY | 7216 AC_FUNC_ATTR_LEGACY); 7217 7218 LLVMBuildStore(ctx->builder, 7219 ac_to_float(&ctx->ac, value), ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)]); 7220 } 7221 idx += slot_inc; 7222 } 7223 handle_vs_outputs_post(ctx, false, &ctx->shader_info->vs.outinfo); 7224 } 7225 7226 void ac_create_gs_copy_shader(LLVMTargetMachineRef tm, 7227 struct nir_shader *geom_shader, 7228 struct ac_shader_binary *binary, 7229 struct ac_shader_config *config, 7230 struct ac_shader_variant_info *shader_info, 7231 const struct ac_nir_compiler_options *options, 7232 bool dump_shader) 7233 { 7234 struct nir_to_llvm_context ctx = {0}; 7235 ctx.context = LLVMContextCreate(); 7236 ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context); 7237 ctx.options = options; 7238 ctx.shader_info = shader_info; 7239 7240 ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class, 7241 options->family); 7242 ctx.ac.module = ctx.module; 7243 7244 ctx.is_gs_copy_shader = true; 7245 LLVMSetTarget(ctx.module, "amdgcn--"); 7246 7247 enum ac_float_mode float_mode = 7248 options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH : 7249 AC_FLOAT_MODE_DEFAULT; 7250 7251 ctx.builder = ac_create_builder(ctx.context, float_mode); 7252 ctx.ac.builder = ctx.builder; 7253 ctx.stage = MESA_SHADER_VERTEX; 7254 7255 create_function(&ctx, MESA_SHADER_VERTEX, false, MESA_SHADER_VERTEX); 7256 7257 ctx.gs_max_out_vertices = geom_shader->info.gs.vertices_out; 7258 ac_setup_rings(&ctx); 7259 7260 ctx.num_output_clips = geom_shader->info.clip_distance_array_size; 7261 ctx.num_output_culls = geom_shader->info.cull_distance_array_size; 7262 7263 struct ac_nir_context nir_ctx = {}; 7264 nir_ctx.ac = ctx.ac; 7265 nir_ctx.abi = &ctx.abi; 7266 7267 nir_ctx.nctx = &ctx; 7268 ctx.nir = &nir_ctx; 7269 7270 nir_foreach_variable(variable, &geom_shader->outputs) { 7271 scan_shader_output_decl(&ctx, variable, geom_shader, MESA_SHADER_VERTEX); 7272 handle_shader_output_decl(&nir_ctx, geom_shader, variable); 7273 } 7274 7275 ac_gs_copy_shader_emit(&ctx); 7276 7277 ctx.nir = NULL; 7278 7279 LLVMBuildRetVoid(ctx.builder); 7280 7281 ac_llvm_finalize_module(&ctx); 7282 7283 ac_compile_llvm_module(tm, ctx.module, binary, config, shader_info, 7284 MESA_SHADER_VERTEX, 7285 dump_shader, options->supports_spill); 7286 } 7287