1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sub license, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 18 * USE OR OTHER DEALINGS IN THE SOFTWARE. 19 * 20 * The above copyright notice and this permission notice (including the 21 * next paragraph) shall be included in all copies or substantial portions 22 * of the Software. 23 * 24 */ 25 /* based on pieces from si_pipe.c and radeon_llvm_emit.c */ 26 #include "ac_llvm_build.h" 27 28 #include <llvm-c/Core.h> 29 30 #include "c11/threads.h" 31 32 #include <assert.h> 33 #include <stdio.h> 34 35 #include "ac_llvm_util.h" 36 #include "ac_exp_param.h" 37 #include "util/bitscan.h" 38 #include "util/macros.h" 39 #include "util/u_atomic.h" 40 #include "sid.h" 41 42 #include "shader_enums.h" 43 44 #define AC_LLVM_INITIAL_CF_DEPTH 4 45 46 /* Data for if/else/endif and bgnloop/endloop control flow structures. 47 */ 48 struct ac_llvm_flow { 49 /* Loop exit or next part of if/else/endif. */ 50 LLVMBasicBlockRef next_block; 51 LLVMBasicBlockRef loop_entry_block; 52 }; 53 54 /* Initialize module-independent parts of the context. 55 * 56 * The caller is responsible for initializing ctx::module and ctx::builder. 57 */ 58 void 59 ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context, 60 enum chip_class chip_class, enum radeon_family family) 61 { 62 LLVMValueRef args[1]; 63 64 ctx->chip_class = chip_class; 65 ctx->family = family; 66 67 ctx->context = context; 68 ctx->module = NULL; 69 ctx->builder = NULL; 70 71 ctx->voidt = LLVMVoidTypeInContext(ctx->context); 72 ctx->i1 = LLVMInt1TypeInContext(ctx->context); 73 ctx->i8 = LLVMInt8TypeInContext(ctx->context); 74 ctx->i16 = LLVMIntTypeInContext(ctx->context, 16); 75 ctx->i32 = LLVMIntTypeInContext(ctx->context, 32); 76 ctx->i64 = LLVMIntTypeInContext(ctx->context, 64); 77 ctx->f16 = LLVMHalfTypeInContext(ctx->context); 78 ctx->f32 = LLVMFloatTypeInContext(ctx->context); 79 ctx->f64 = LLVMDoubleTypeInContext(ctx->context); 80 ctx->v2i32 = LLVMVectorType(ctx->i32, 2); 81 ctx->v3i32 = LLVMVectorType(ctx->i32, 3); 82 ctx->v4i32 = LLVMVectorType(ctx->i32, 4); 83 ctx->v2f32 = LLVMVectorType(ctx->f32, 2); 84 ctx->v4f32 = LLVMVectorType(ctx->f32, 4); 85 ctx->v8i32 = LLVMVectorType(ctx->i32, 8); 86 87 ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false); 88 ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false); 89 ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false); 90 ctx->i64_1 = LLVMConstInt(ctx->i64, 1, false); 91 ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0); 92 ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0); 93 ctx->f64_0 = LLVMConstReal(ctx->f64, 0.0); 94 ctx->f64_1 = LLVMConstReal(ctx->f64, 1.0); 95 96 ctx->i1false = LLVMConstInt(ctx->i1, 0, false); 97 ctx->i1true = LLVMConstInt(ctx->i1, 1, false); 98 99 ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context, 100 "range", 5); 101 102 ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context, 103 "invariant.load", 14); 104 105 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6); 106 107 args[0] = LLVMConstReal(ctx->f32, 2.5); 108 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1); 109 110 ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context, 111 "amdgpu.uniform", 14); 112 113 ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0); 114 } 115 116 void 117 ac_llvm_context_dispose(struct ac_llvm_context *ctx) 118 { 119 free(ctx->flow); 120 ctx->flow = NULL; 121 ctx->flow_depth_max = 0; 122 } 123 124 int 125 ac_get_llvm_num_components(LLVMValueRef value) 126 { 127 LLVMTypeRef type = LLVMTypeOf(value); 128 unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind 129 ? LLVMGetVectorSize(type) 130 : 1; 131 return num_components; 132 } 133 134 LLVMValueRef 135 ac_llvm_extract_elem(struct ac_llvm_context *ac, 136 LLVMValueRef value, 137 int index) 138 { 139 if (LLVMGetTypeKind(LLVMTypeOf(value)) != LLVMVectorTypeKind) { 140 assert(index == 0); 141 return value; 142 } 143 144 return LLVMBuildExtractElement(ac->builder, value, 145 LLVMConstInt(ac->i32, index, false), ""); 146 } 147 148 unsigned 149 ac_get_type_size(LLVMTypeRef type) 150 { 151 LLVMTypeKind kind = LLVMGetTypeKind(type); 152 153 switch (kind) { 154 case LLVMIntegerTypeKind: 155 return LLVMGetIntTypeWidth(type) / 8; 156 case LLVMFloatTypeKind: 157 return 4; 158 case LLVMDoubleTypeKind: 159 case LLVMPointerTypeKind: 160 return 8; 161 case LLVMVectorTypeKind: 162 return LLVMGetVectorSize(type) * 163 ac_get_type_size(LLVMGetElementType(type)); 164 case LLVMArrayTypeKind: 165 return LLVMGetArrayLength(type) * 166 ac_get_type_size(LLVMGetElementType(type)); 167 default: 168 assert(0); 169 return 0; 170 } 171 } 172 173 static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t) 174 { 175 if (t == ctx->f16 || t == ctx->i16) 176 return ctx->i16; 177 else if (t == ctx->f32 || t == ctx->i32) 178 return ctx->i32; 179 else if (t == ctx->f64 || t == ctx->i64) 180 return ctx->i64; 181 else 182 unreachable("Unhandled integer size"); 183 } 184 185 LLVMTypeRef 186 ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t) 187 { 188 if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) { 189 LLVMTypeRef elem_type = LLVMGetElementType(t); 190 return LLVMVectorType(to_integer_type_scalar(ctx, elem_type), 191 LLVMGetVectorSize(t)); 192 } 193 return to_integer_type_scalar(ctx, t); 194 } 195 196 LLVMValueRef 197 ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v) 198 { 199 LLVMTypeRef type = LLVMTypeOf(v); 200 return LLVMBuildBitCast(ctx->builder, v, ac_to_integer_type(ctx, type), ""); 201 } 202 203 static LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t) 204 { 205 if (t == ctx->i16 || t == ctx->f16) 206 return ctx->f16; 207 else if (t == ctx->i32 || t == ctx->f32) 208 return ctx->f32; 209 else if (t == ctx->i64 || t == ctx->f64) 210 return ctx->f64; 211 else 212 unreachable("Unhandled float size"); 213 } 214 215 LLVMTypeRef 216 ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t) 217 { 218 if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) { 219 LLVMTypeRef elem_type = LLVMGetElementType(t); 220 return LLVMVectorType(to_float_type_scalar(ctx, elem_type), 221 LLVMGetVectorSize(t)); 222 } 223 return to_float_type_scalar(ctx, t); 224 } 225 226 LLVMValueRef 227 ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v) 228 { 229 LLVMTypeRef type = LLVMTypeOf(v); 230 return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx, type), ""); 231 } 232 233 234 LLVMValueRef 235 ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name, 236 LLVMTypeRef return_type, LLVMValueRef *params, 237 unsigned param_count, unsigned attrib_mask) 238 { 239 LLVMValueRef function, call; 240 bool set_callsite_attrs = HAVE_LLVM >= 0x0400 && 241 !(attrib_mask & AC_FUNC_ATTR_LEGACY); 242 243 function = LLVMGetNamedFunction(ctx->module, name); 244 if (!function) { 245 LLVMTypeRef param_types[32], function_type; 246 unsigned i; 247 248 assert(param_count <= 32); 249 250 for (i = 0; i < param_count; ++i) { 251 assert(params[i]); 252 param_types[i] = LLVMTypeOf(params[i]); 253 } 254 function_type = 255 LLVMFunctionType(return_type, param_types, param_count, 0); 256 function = LLVMAddFunction(ctx->module, name, function_type); 257 258 LLVMSetFunctionCallConv(function, LLVMCCallConv); 259 LLVMSetLinkage(function, LLVMExternalLinkage); 260 261 if (!set_callsite_attrs) 262 ac_add_func_attributes(ctx->context, function, attrib_mask); 263 } 264 265 call = LLVMBuildCall(ctx->builder, function, params, param_count, ""); 266 if (set_callsite_attrs) 267 ac_add_func_attributes(ctx->context, call, attrib_mask); 268 return call; 269 } 270 271 /** 272 * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with 273 * intrinsic names). 274 */ 275 void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize) 276 { 277 LLVMTypeRef elem_type = type; 278 279 assert(bufsize >= 8); 280 281 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) { 282 int ret = snprintf(buf, bufsize, "v%u", 283 LLVMGetVectorSize(type)); 284 if (ret < 0) { 285 char *type_name = LLVMPrintTypeToString(type); 286 fprintf(stderr, "Error building type name for: %s\n", 287 type_name); 288 return; 289 } 290 elem_type = LLVMGetElementType(type); 291 buf += ret; 292 bufsize -= ret; 293 } 294 switch (LLVMGetTypeKind(elem_type)) { 295 default: break; 296 case LLVMIntegerTypeKind: 297 snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type)); 298 break; 299 case LLVMFloatTypeKind: 300 snprintf(buf, bufsize, "f32"); 301 break; 302 case LLVMDoubleTypeKind: 303 snprintf(buf, bufsize, "f64"); 304 break; 305 } 306 } 307 308 /** 309 * Helper function that builds an LLVM IR PHI node and immediately adds 310 * incoming edges. 311 */ 312 LLVMValueRef 313 ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type, 314 unsigned count_incoming, LLVMValueRef *values, 315 LLVMBasicBlockRef *blocks) 316 { 317 LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, ""); 318 LLVMAddIncoming(phi, values, blocks, count_incoming); 319 return phi; 320 } 321 322 /* Prevent optimizations (at least of memory accesses) across the current 323 * point in the program by emitting empty inline assembly that is marked as 324 * having side effects. 325 * 326 * Optionally, a value can be passed through the inline assembly to prevent 327 * LLVM from hoisting calls to ReadNone functions. 328 */ 329 void 330 ac_build_optimization_barrier(struct ac_llvm_context *ctx, 331 LLVMValueRef *pvgpr) 332 { 333 static int counter = 0; 334 335 LLVMBuilderRef builder = ctx->builder; 336 char code[16]; 337 338 snprintf(code, sizeof(code), "; %d", p_atomic_inc_return(&counter)); 339 340 if (!pvgpr) { 341 LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false); 342 LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false); 343 LLVMBuildCall(builder, inlineasm, NULL, 0, ""); 344 } else { 345 LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false); 346 LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false); 347 LLVMValueRef vgpr = *pvgpr; 348 LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr); 349 unsigned vgpr_size = ac_get_type_size(vgpr_type); 350 LLVMValueRef vgpr0; 351 352 assert(vgpr_size % 4 == 0); 353 354 vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), ""); 355 vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, ""); 356 vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, ""); 357 vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, ""); 358 vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, ""); 359 360 *pvgpr = vgpr; 361 } 362 } 363 364 LLVMValueRef 365 ac_build_ballot(struct ac_llvm_context *ctx, 366 LLVMValueRef value) 367 { 368 LLVMValueRef args[3] = { 369 value, 370 ctx->i32_0, 371 LLVMConstInt(ctx->i32, LLVMIntNE, 0) 372 }; 373 374 /* We currently have no other way to prevent LLVM from lifting the icmp 375 * calls to a dominating basic block. 376 */ 377 ac_build_optimization_barrier(ctx, &args[0]); 378 379 if (LLVMTypeOf(args[0]) != ctx->i32) 380 args[0] = LLVMBuildBitCast(ctx->builder, args[0], ctx->i32, ""); 381 382 return ac_build_intrinsic(ctx, 383 "llvm.amdgcn.icmp.i32", 384 ctx->i64, args, 3, 385 AC_FUNC_ATTR_NOUNWIND | 386 AC_FUNC_ATTR_READNONE | 387 AC_FUNC_ATTR_CONVERGENT); 388 } 389 390 LLVMValueRef 391 ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value) 392 { 393 LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1); 394 LLVMValueRef vote_set = ac_build_ballot(ctx, value); 395 return LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, ""); 396 } 397 398 LLVMValueRef 399 ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value) 400 { 401 LLVMValueRef vote_set = ac_build_ballot(ctx, value); 402 return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set, 403 LLVMConstInt(ctx->i64, 0, 0), ""); 404 } 405 406 LLVMValueRef 407 ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value) 408 { 409 LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1); 410 LLVMValueRef vote_set = ac_build_ballot(ctx, value); 411 412 LLVMValueRef all = LLVMBuildICmp(ctx->builder, LLVMIntEQ, 413 vote_set, active_set, ""); 414 LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ, 415 vote_set, 416 LLVMConstInt(ctx->i64, 0, 0), ""); 417 return LLVMBuildOr(ctx->builder, all, none, ""); 418 } 419 420 LLVMValueRef 421 ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values, 422 unsigned value_count, unsigned component) 423 { 424 LLVMValueRef vec = NULL; 425 426 if (value_count == 1) { 427 return values[component]; 428 } else if (!value_count) 429 unreachable("value_count is 0"); 430 431 for (unsigned i = component; i < value_count + component; i++) { 432 LLVMValueRef value = values[i]; 433 434 if (i == component) 435 vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count)); 436 LLVMValueRef index = LLVMConstInt(ctx->i32, i - component, false); 437 vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, ""); 438 } 439 return vec; 440 } 441 442 LLVMValueRef 443 ac_build_gather_values_extended(struct ac_llvm_context *ctx, 444 LLVMValueRef *values, 445 unsigned value_count, 446 unsigned value_stride, 447 bool load, 448 bool always_vector) 449 { 450 LLVMBuilderRef builder = ctx->builder; 451 LLVMValueRef vec = NULL; 452 unsigned i; 453 454 if (value_count == 1 && !always_vector) { 455 if (load) 456 return LLVMBuildLoad(builder, values[0], ""); 457 return values[0]; 458 } else if (!value_count) 459 unreachable("value_count is 0"); 460 461 for (i = 0; i < value_count; i++) { 462 LLVMValueRef value = values[i * value_stride]; 463 if (load) 464 value = LLVMBuildLoad(builder, value, ""); 465 466 if (!i) 467 vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count)); 468 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false); 469 vec = LLVMBuildInsertElement(builder, vec, value, index, ""); 470 } 471 return vec; 472 } 473 474 LLVMValueRef 475 ac_build_gather_values(struct ac_llvm_context *ctx, 476 LLVMValueRef *values, 477 unsigned value_count) 478 { 479 return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false); 480 } 481 482 LLVMValueRef 483 ac_build_fdiv(struct ac_llvm_context *ctx, 484 LLVMValueRef num, 485 LLVMValueRef den) 486 { 487 LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, ""); 488 489 /* Use v_rcp_f32 instead of precise division. */ 490 if (!LLVMIsConstant(ret)) 491 LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp); 492 return ret; 493 } 494 495 /* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27 496 * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is 497 * already multiplied by two. id is the cube face number. 498 */ 499 struct cube_selection_coords { 500 LLVMValueRef stc[2]; 501 LLVMValueRef ma; 502 LLVMValueRef id; 503 }; 504 505 static void 506 build_cube_intrinsic(struct ac_llvm_context *ctx, 507 LLVMValueRef in[3], 508 struct cube_selection_coords *out) 509 { 510 LLVMTypeRef f32 = ctx->f32; 511 512 out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc", 513 f32, in, 3, AC_FUNC_ATTR_READNONE); 514 out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc", 515 f32, in, 3, AC_FUNC_ATTR_READNONE); 516 out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema", 517 f32, in, 3, AC_FUNC_ATTR_READNONE); 518 out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid", 519 f32, in, 3, AC_FUNC_ATTR_READNONE); 520 } 521 522 /** 523 * Build a manual selection sequence for cube face sc/tc coordinates and 524 * major axis vector (multiplied by 2 for consistency) for the given 525 * vec3 \p coords, for the face implied by \p selcoords. 526 * 527 * For the major axis, we always adjust the sign to be in the direction of 528 * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards 529 * the selcoords major axis. 530 */ 531 static void build_cube_select(struct ac_llvm_context *ctx, 532 const struct cube_selection_coords *selcoords, 533 const LLVMValueRef *coords, 534 LLVMValueRef *out_st, 535 LLVMValueRef *out_ma) 536 { 537 LLVMBuilderRef builder = ctx->builder; 538 LLVMTypeRef f32 = LLVMTypeOf(coords[0]); 539 LLVMValueRef is_ma_positive; 540 LLVMValueRef sgn_ma; 541 LLVMValueRef is_ma_z, is_not_ma_z; 542 LLVMValueRef is_ma_y; 543 LLVMValueRef is_ma_x; 544 LLVMValueRef sgn; 545 LLVMValueRef tmp; 546 547 is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE, 548 selcoords->ma, LLVMConstReal(f32, 0.0), ""); 549 sgn_ma = LLVMBuildSelect(builder, is_ma_positive, 550 LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), ""); 551 552 is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), ""); 553 is_not_ma_z = LLVMBuildNot(builder, is_ma_z, ""); 554 is_ma_y = LLVMBuildAnd(builder, is_not_ma_z, 555 LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), ""); 556 is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), ""); 557 558 /* Select sc */ 559 tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], ""); 560 sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0), 561 LLVMBuildSelect(builder, is_ma_z, sgn_ma, 562 LLVMBuildFNeg(builder, sgn_ma, ""), ""), ""); 563 out_st[0] = LLVMBuildFMul(builder, tmp, sgn, ""); 564 565 /* Select tc */ 566 tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], ""); 567 sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma, 568 LLVMConstReal(f32, -1.0), ""); 569 out_st[1] = LLVMBuildFMul(builder, tmp, sgn, ""); 570 571 /* Select ma */ 572 tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], 573 LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), ""); 574 tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32", 575 ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE); 576 *out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), ""); 577 } 578 579 void 580 ac_prepare_cube_coords(struct ac_llvm_context *ctx, 581 bool is_deriv, bool is_array, bool is_lod, 582 LLVMValueRef *coords_arg, 583 LLVMValueRef *derivs_arg) 584 { 585 586 LLVMBuilderRef builder = ctx->builder; 587 struct cube_selection_coords selcoords; 588 LLVMValueRef coords[3]; 589 LLVMValueRef invma; 590 591 if (is_array && !is_lod) { 592 LLVMValueRef tmp = coords_arg[3]; 593 tmp = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &tmp, 1, 0); 594 595 /* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says: 596 * 597 * "For Array forms, the array layer used will be 598 * 599 * max(0, min(d1, floor(layer+0.5))) 600 * 601 * where d is the depth of the texture array and layer 602 * comes from the component indicated in the tables below. 603 * Workaroudn for an issue where the layer is taken from a 604 * helper invocation which happens to fall on a different 605 * layer due to extrapolation." 606 * 607 * VI and earlier attempt to implement this in hardware by 608 * clamping the value of coords[2] = (8 * layer) + face. 609 * Unfortunately, this means that the we end up with the wrong 610 * face when clamping occurs. 611 * 612 * Clamp the layer earlier to work around the issue. 613 */ 614 if (ctx->chip_class <= VI) { 615 LLVMValueRef ge0; 616 ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, ""); 617 tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, ""); 618 } 619 620 coords_arg[3] = tmp; 621 } 622 623 build_cube_intrinsic(ctx, coords_arg, &selcoords); 624 625 invma = ac_build_intrinsic(ctx, "llvm.fabs.f32", 626 ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE); 627 invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma); 628 629 for (int i = 0; i < 2; ++i) 630 coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, ""); 631 632 coords[2] = selcoords.id; 633 634 if (is_deriv && derivs_arg) { 635 LLVMValueRef derivs[4]; 636 int axis; 637 638 /* Convert cube derivatives to 2D derivatives. */ 639 for (axis = 0; axis < 2; axis++) { 640 LLVMValueRef deriv_st[2]; 641 LLVMValueRef deriv_ma; 642 643 /* Transform the derivative alongside the texture 644 * coordinate. Mathematically, the correct formula is 645 * as follows. Assume we're projecting onto the +Z face 646 * and denote by dx/dh the derivative of the (original) 647 * X texture coordinate with respect to horizontal 648 * window coordinates. The projection onto the +Z face 649 * plane is: 650 * 651 * f(x,z) = x/z 652 * 653 * Then df/dh = df/dx * dx/dh + df/dz * dz/dh 654 * = 1/z * dx/dh - x/z * 1/z * dz/dh. 655 * 656 * This motivatives the implementation below. 657 * 658 * Whether this actually gives the expected results for 659 * apps that might feed in derivatives obtained via 660 * finite differences is anyone's guess. The OpenGL spec 661 * seems awfully quiet about how textureGrad for cube 662 * maps should be handled. 663 */ 664 build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3], 665 deriv_st, &deriv_ma); 666 667 deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, ""); 668 669 for (int i = 0; i < 2; ++i) 670 derivs[axis * 2 + i] = 671 LLVMBuildFSub(builder, 672 LLVMBuildFMul(builder, deriv_st[i], invma, ""), 673 LLVMBuildFMul(builder, deriv_ma, coords[i], ""), ""); 674 } 675 676 memcpy(derivs_arg, derivs, sizeof(derivs)); 677 } 678 679 /* Shift the texture coordinate. This must be applied after the 680 * derivative calculation. 681 */ 682 for (int i = 0; i < 2; ++i) 683 coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), ""); 684 685 if (is_array) { 686 /* for cube arrays coord.z = coord.w(array_index) * 8 + face */ 687 /* coords_arg.w component - array_index for cube arrays */ 688 LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), ""); 689 coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], ""); 690 } 691 692 memcpy(coords_arg, coords, sizeof(coords)); 693 } 694 695 696 LLVMValueRef 697 ac_build_fs_interp(struct ac_llvm_context *ctx, 698 LLVMValueRef llvm_chan, 699 LLVMValueRef attr_number, 700 LLVMValueRef params, 701 LLVMValueRef i, 702 LLVMValueRef j) 703 { 704 LLVMValueRef args[5]; 705 LLVMValueRef p1; 706 707 if (HAVE_LLVM < 0x0400) { 708 LLVMValueRef ij[2]; 709 ij[0] = LLVMBuildBitCast(ctx->builder, i, ctx->i32, ""); 710 ij[1] = LLVMBuildBitCast(ctx->builder, j, ctx->i32, ""); 711 712 args[0] = llvm_chan; 713 args[1] = attr_number; 714 args[2] = params; 715 args[3] = ac_build_gather_values(ctx, ij, 2); 716 return ac_build_intrinsic(ctx, "llvm.SI.fs.interp", 717 ctx->f32, args, 4, 718 AC_FUNC_ATTR_READNONE); 719 } 720 721 args[0] = i; 722 args[1] = llvm_chan; 723 args[2] = attr_number; 724 args[3] = params; 725 726 p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1", 727 ctx->f32, args, 4, AC_FUNC_ATTR_READNONE); 728 729 args[0] = p1; 730 args[1] = j; 731 args[2] = llvm_chan; 732 args[3] = attr_number; 733 args[4] = params; 734 735 return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2", 736 ctx->f32, args, 5, AC_FUNC_ATTR_READNONE); 737 } 738 739 LLVMValueRef 740 ac_build_fs_interp_mov(struct ac_llvm_context *ctx, 741 LLVMValueRef parameter, 742 LLVMValueRef llvm_chan, 743 LLVMValueRef attr_number, 744 LLVMValueRef params) 745 { 746 LLVMValueRef args[4]; 747 if (HAVE_LLVM < 0x0400) { 748 args[0] = llvm_chan; 749 args[1] = attr_number; 750 args[2] = params; 751 752 return ac_build_intrinsic(ctx, 753 "llvm.SI.fs.constant", 754 ctx->f32, args, 3, 755 AC_FUNC_ATTR_READNONE); 756 } 757 758 args[0] = parameter; 759 args[1] = llvm_chan; 760 args[2] = attr_number; 761 args[3] = params; 762 763 return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.mov", 764 ctx->f32, args, 4, AC_FUNC_ATTR_READNONE); 765 } 766 767 LLVMValueRef 768 ac_build_gep0(struct ac_llvm_context *ctx, 769 LLVMValueRef base_ptr, 770 LLVMValueRef index) 771 { 772 LLVMValueRef indices[2] = { 773 LLVMConstInt(ctx->i32, 0, 0), 774 index, 775 }; 776 return LLVMBuildGEP(ctx->builder, base_ptr, 777 indices, 2, ""); 778 } 779 780 void 781 ac_build_indexed_store(struct ac_llvm_context *ctx, 782 LLVMValueRef base_ptr, LLVMValueRef index, 783 LLVMValueRef value) 784 { 785 LLVMBuildStore(ctx->builder, value, 786 ac_build_gep0(ctx, base_ptr, index)); 787 } 788 789 /** 790 * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad. 791 * It's equivalent to doing a load from &base_ptr[index]. 792 * 793 * \param base_ptr Where the array starts. 794 * \param index The element index into the array. 795 * \param uniform Whether the base_ptr and index can be assumed to be 796 * dynamically uniform (i.e. load to an SGPR) 797 * \param invariant Whether the load is invariant (no other opcodes affect it) 798 */ 799 static LLVMValueRef 800 ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, 801 LLVMValueRef index, bool uniform, bool invariant) 802 { 803 LLVMValueRef pointer, result; 804 805 pointer = ac_build_gep0(ctx, base_ptr, index); 806 if (uniform) 807 LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md); 808 result = LLVMBuildLoad(ctx->builder, pointer, ""); 809 if (invariant) 810 LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md); 811 return result; 812 } 813 814 LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, 815 LLVMValueRef index) 816 { 817 return ac_build_load_custom(ctx, base_ptr, index, false, false); 818 } 819 820 LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx, 821 LLVMValueRef base_ptr, LLVMValueRef index) 822 { 823 return ac_build_load_custom(ctx, base_ptr, index, false, true); 824 } 825 826 LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, 827 LLVMValueRef base_ptr, LLVMValueRef index) 828 { 829 return ac_build_load_custom(ctx, base_ptr, index, true, true); 830 } 831 832 /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4. 833 * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2), 834 * or v4i32 (num_channels=3,4). 835 */ 836 void 837 ac_build_buffer_store_dword(struct ac_llvm_context *ctx, 838 LLVMValueRef rsrc, 839 LLVMValueRef vdata, 840 unsigned num_channels, 841 LLVMValueRef voffset, 842 LLVMValueRef soffset, 843 unsigned inst_offset, 844 bool glc, 845 bool slc, 846 bool writeonly_memory, 847 bool swizzle_enable_hint) 848 { 849 /* SWIZZLE_ENABLE requires that soffset isn't folded into voffset 850 * (voffset is swizzled, but soffset isn't swizzled). 851 * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter. 852 */ 853 if (!swizzle_enable_hint) { 854 /* Split 3 channel stores, becase LLVM doesn't support 3-channel 855 * intrinsics. */ 856 if (num_channels == 3) { 857 LLVMValueRef v[3], v01; 858 859 for (int i = 0; i < 3; i++) { 860 v[i] = LLVMBuildExtractElement(ctx->builder, vdata, 861 LLVMConstInt(ctx->i32, i, 0), ""); 862 } 863 v01 = ac_build_gather_values(ctx, v, 2); 864 865 ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset, 866 soffset, inst_offset, glc, slc, 867 writeonly_memory, swizzle_enable_hint); 868 ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset, 869 soffset, inst_offset + 8, 870 glc, slc, 871 writeonly_memory, swizzle_enable_hint); 872 return; 873 } 874 875 unsigned func = CLAMP(num_channels, 1, 3) - 1; 876 static const char *types[] = {"f32", "v2f32", "v4f32"}; 877 char name[256]; 878 LLVMValueRef offset = soffset; 879 880 if (inst_offset) 881 offset = LLVMBuildAdd(ctx->builder, offset, 882 LLVMConstInt(ctx->i32, inst_offset, 0), ""); 883 if (voffset) 884 offset = LLVMBuildAdd(ctx->builder, offset, voffset, ""); 885 886 LLVMValueRef args[] = { 887 ac_to_float(ctx, vdata), 888 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""), 889 LLVMConstInt(ctx->i32, 0, 0), 890 offset, 891 LLVMConstInt(ctx->i1, glc, 0), 892 LLVMConstInt(ctx->i1, slc, 0), 893 }; 894 895 snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s", 896 types[func]); 897 898 ac_build_intrinsic(ctx, name, ctx->voidt, 899 args, ARRAY_SIZE(args), 900 writeonly_memory ? 901 AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY : 902 AC_FUNC_ATTR_WRITEONLY); 903 return; 904 } 905 906 static unsigned dfmt[] = { 907 V_008F0C_BUF_DATA_FORMAT_32, 908 V_008F0C_BUF_DATA_FORMAT_32_32, 909 V_008F0C_BUF_DATA_FORMAT_32_32_32, 910 V_008F0C_BUF_DATA_FORMAT_32_32_32_32 911 }; 912 assert(num_channels >= 1 && num_channels <= 4); 913 914 LLVMValueRef args[] = { 915 rsrc, 916 vdata, 917 LLVMConstInt(ctx->i32, num_channels, 0), 918 voffset ? voffset : LLVMGetUndef(ctx->i32), 919 soffset, 920 LLVMConstInt(ctx->i32, inst_offset, 0), 921 LLVMConstInt(ctx->i32, dfmt[num_channels - 1], 0), 922 LLVMConstInt(ctx->i32, V_008F0C_BUF_NUM_FORMAT_UINT, 0), 923 LLVMConstInt(ctx->i32, voffset != NULL, 0), 924 LLVMConstInt(ctx->i32, 0, 0), /* idxen */ 925 LLVMConstInt(ctx->i32, glc, 0), 926 LLVMConstInt(ctx->i32, slc, 0), 927 LLVMConstInt(ctx->i32, 0, 0), /* tfe*/ 928 }; 929 930 /* The instruction offset field has 12 bits */ 931 assert(voffset || inst_offset < (1 << 12)); 932 933 /* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */ 934 unsigned func = CLAMP(num_channels, 1, 3) - 1; 935 const char *types[] = {"i32", "v2i32", "v4i32"}; 936 char name[256]; 937 snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]); 938 939 ac_build_intrinsic(ctx, name, ctx->voidt, 940 args, ARRAY_SIZE(args), 941 AC_FUNC_ATTR_LEGACY); 942 } 943 944 LLVMValueRef 945 ac_build_buffer_load(struct ac_llvm_context *ctx, 946 LLVMValueRef rsrc, 947 int num_channels, 948 LLVMValueRef vindex, 949 LLVMValueRef voffset, 950 LLVMValueRef soffset, 951 unsigned inst_offset, 952 unsigned glc, 953 unsigned slc, 954 bool can_speculate, 955 bool allow_smem) 956 { 957 LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0); 958 if (voffset) 959 offset = LLVMBuildAdd(ctx->builder, offset, voffset, ""); 960 if (soffset) 961 offset = LLVMBuildAdd(ctx->builder, offset, soffset, ""); 962 963 /* TODO: VI and later generations can use SMEM with GLC=1.*/ 964 if (allow_smem && !glc && !slc) { 965 assert(vindex == NULL); 966 967 LLVMValueRef result[4]; 968 969 for (int i = 0; i < num_channels; i++) { 970 if (i) { 971 offset = LLVMBuildAdd(ctx->builder, offset, 972 LLVMConstInt(ctx->i32, 4, 0), ""); 973 } 974 LLVMValueRef args[2] = {rsrc, offset}; 975 result[i] = ac_build_intrinsic(ctx, "llvm.SI.load.const.v4i32", 976 ctx->f32, args, 2, 977 AC_FUNC_ATTR_READNONE | 978 AC_FUNC_ATTR_LEGACY); 979 } 980 if (num_channels == 1) 981 return result[0]; 982 983 if (num_channels == 3) 984 result[num_channels++] = LLVMGetUndef(ctx->f32); 985 return ac_build_gather_values(ctx, result, num_channels); 986 } 987 988 unsigned func = CLAMP(num_channels, 1, 3) - 1; 989 990 LLVMValueRef args[] = { 991 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""), 992 vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0), 993 offset, 994 LLVMConstInt(ctx->i1, glc, 0), 995 LLVMConstInt(ctx->i1, slc, 0) 996 }; 997 998 LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2), 999 ctx->v4f32}; 1000 const char *type_names[] = {"f32", "v2f32", "v4f32"}; 1001 char name[256]; 1002 1003 snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s", 1004 type_names[func]); 1005 1006 return ac_build_intrinsic(ctx, name, types[func], args, 1007 ARRAY_SIZE(args), 1008 ac_get_load_intr_attribs(can_speculate)); 1009 } 1010 1011 LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, 1012 LLVMValueRef rsrc, 1013 LLVMValueRef vindex, 1014 LLVMValueRef voffset, 1015 bool can_speculate) 1016 { 1017 LLVMValueRef args [] = { 1018 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""), 1019 vindex, 1020 voffset, 1021 ctx->i1false, /* glc */ 1022 ctx->i1false, /* slc */ 1023 }; 1024 1025 return ac_build_intrinsic(ctx, 1026 "llvm.amdgcn.buffer.load.format.v4f32", 1027 ctx->v4f32, args, ARRAY_SIZE(args), 1028 ac_get_load_intr_attribs(can_speculate)); 1029 } 1030 1031 LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx, 1032 LLVMValueRef rsrc, 1033 LLVMValueRef vindex, 1034 LLVMValueRef voffset, 1035 bool can_speculate) 1036 { 1037 LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 2, 0), ""); 1038 LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 1, 0), ""); 1039 stride = LLVMBuildLShr(ctx->builder, stride, LLVMConstInt(ctx->i32, 16, 0), ""); 1040 1041 LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->builder, 1042 LLVMBuildICmp(ctx->builder, LLVMIntUGT, elem_count, stride, ""), 1043 elem_count, stride, ""); 1044 1045 LLVMValueRef new_rsrc = LLVMBuildInsertElement(ctx->builder, rsrc, new_elem_count, 1046 LLVMConstInt(ctx->i32, 2, 0), ""); 1047 1048 return ac_build_buffer_load_format(ctx, new_rsrc, vindex, voffset, can_speculate); 1049 } 1050 1051 /** 1052 * Set range metadata on an instruction. This can only be used on load and 1053 * call instructions. If you know an instruction can only produce the values 1054 * 0, 1, 2, you would do set_range_metadata(value, 0, 3); 1055 * \p lo is the minimum value inclusive. 1056 * \p hi is the maximum value exclusive. 1057 */ 1058 static void set_range_metadata(struct ac_llvm_context *ctx, 1059 LLVMValueRef value, unsigned lo, unsigned hi) 1060 { 1061 LLVMValueRef range_md, md_args[2]; 1062 LLVMTypeRef type = LLVMTypeOf(value); 1063 LLVMContextRef context = LLVMGetTypeContext(type); 1064 1065 md_args[0] = LLVMConstInt(type, lo, false); 1066 md_args[1] = LLVMConstInt(type, hi, false); 1067 range_md = LLVMMDNodeInContext(context, md_args, 2); 1068 LLVMSetMetadata(value, ctx->range_md_kind, range_md); 1069 } 1070 1071 LLVMValueRef 1072 ac_get_thread_id(struct ac_llvm_context *ctx) 1073 { 1074 LLVMValueRef tid; 1075 1076 LLVMValueRef tid_args[2]; 1077 tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false); 1078 tid_args[1] = LLVMConstInt(ctx->i32, 0, false); 1079 tid_args[1] = ac_build_intrinsic(ctx, 1080 "llvm.amdgcn.mbcnt.lo", ctx->i32, 1081 tid_args, 2, AC_FUNC_ATTR_READNONE); 1082 1083 tid = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", 1084 ctx->i32, tid_args, 1085 2, AC_FUNC_ATTR_READNONE); 1086 set_range_metadata(ctx, tid, 0, 64); 1087 return tid; 1088 } 1089 1090 /* 1091 * SI implements derivatives using the local data store (LDS) 1092 * All writes to the LDS happen in all executing threads at 1093 * the same time. TID is the Thread ID for the current 1094 * thread and is a value between 0 and 63, representing 1095 * the thread's position in the wavefront. 1096 * 1097 * For the pixel shader threads are grouped into quads of four pixels. 1098 * The TIDs of the pixels of a quad are: 1099 * 1100 * +------+------+ 1101 * |4n + 0|4n + 1| 1102 * +------+------+ 1103 * |4n + 2|4n + 3| 1104 * +------+------+ 1105 * 1106 * So, masking the TID with 0xfffffffc yields the TID of the top left pixel 1107 * of the quad, masking with 0xfffffffd yields the TID of the top pixel of 1108 * the current pixel's column, and masking with 0xfffffffe yields the TID 1109 * of the left pixel of the current pixel's row. 1110 * 1111 * Adding 1 yields the TID of the pixel to the right of the left pixel, and 1112 * adding 2 yields the TID of the pixel below the top pixel. 1113 */ 1114 LLVMValueRef 1115 ac_build_ddxy(struct ac_llvm_context *ctx, 1116 uint32_t mask, 1117 int idx, 1118 LLVMValueRef val) 1119 { 1120 LLVMValueRef tl, trbl, args[2]; 1121 LLVMValueRef result; 1122 1123 if (ctx->chip_class >= VI) { 1124 LLVMValueRef thread_id, tl_tid, trbl_tid; 1125 thread_id = ac_get_thread_id(ctx); 1126 1127 tl_tid = LLVMBuildAnd(ctx->builder, thread_id, 1128 LLVMConstInt(ctx->i32, mask, false), ""); 1129 1130 trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid, 1131 LLVMConstInt(ctx->i32, idx, false), ""); 1132 1133 args[0] = LLVMBuildMul(ctx->builder, tl_tid, 1134 LLVMConstInt(ctx->i32, 4, false), ""); 1135 args[1] = val; 1136 tl = ac_build_intrinsic(ctx, 1137 "llvm.amdgcn.ds.bpermute", ctx->i32, 1138 args, 2, 1139 AC_FUNC_ATTR_READNONE | 1140 AC_FUNC_ATTR_CONVERGENT); 1141 1142 args[0] = LLVMBuildMul(ctx->builder, trbl_tid, 1143 LLVMConstInt(ctx->i32, 4, false), ""); 1144 trbl = ac_build_intrinsic(ctx, 1145 "llvm.amdgcn.ds.bpermute", ctx->i32, 1146 args, 2, 1147 AC_FUNC_ATTR_READNONE | 1148 AC_FUNC_ATTR_CONVERGENT); 1149 } else { 1150 uint32_t masks[2] = {}; 1151 1152 switch (mask) { 1153 case AC_TID_MASK_TOP_LEFT: 1154 masks[0] = 0x8000; 1155 if (idx == 1) 1156 masks[1] = 0x8055; 1157 else 1158 masks[1] = 0x80aa; 1159 1160 break; 1161 case AC_TID_MASK_TOP: 1162 masks[0] = 0x8044; 1163 masks[1] = 0x80ee; 1164 break; 1165 case AC_TID_MASK_LEFT: 1166 masks[0] = 0x80a0; 1167 masks[1] = 0x80f5; 1168 break; 1169 default: 1170 assert(0); 1171 } 1172 1173 args[0] = val; 1174 args[1] = LLVMConstInt(ctx->i32, masks[0], false); 1175 1176 tl = ac_build_intrinsic(ctx, 1177 "llvm.amdgcn.ds.swizzle", ctx->i32, 1178 args, 2, 1179 AC_FUNC_ATTR_READNONE | 1180 AC_FUNC_ATTR_CONVERGENT); 1181 1182 args[1] = LLVMConstInt(ctx->i32, masks[1], false); 1183 trbl = ac_build_intrinsic(ctx, 1184 "llvm.amdgcn.ds.swizzle", ctx->i32, 1185 args, 2, 1186 AC_FUNC_ATTR_READNONE | 1187 AC_FUNC_ATTR_CONVERGENT); 1188 } 1189 1190 tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, ""); 1191 trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, ""); 1192 result = LLVMBuildFSub(ctx->builder, trbl, tl, ""); 1193 return result; 1194 } 1195 1196 void 1197 ac_build_sendmsg(struct ac_llvm_context *ctx, 1198 uint32_t msg, 1199 LLVMValueRef wave_id) 1200 { 1201 LLVMValueRef args[2]; 1202 const char *intr_name = (HAVE_LLVM < 0x0400) ? "llvm.SI.sendmsg" : "llvm.amdgcn.s.sendmsg"; 1203 args[0] = LLVMConstInt(ctx->i32, msg, false); 1204 args[1] = wave_id; 1205 ac_build_intrinsic(ctx, intr_name, ctx->voidt, args, 2, 0); 1206 } 1207 1208 LLVMValueRef 1209 ac_build_imsb(struct ac_llvm_context *ctx, 1210 LLVMValueRef arg, 1211 LLVMTypeRef dst_type) 1212 { 1213 const char *intr_name = (HAVE_LLVM < 0x0400) ? "llvm.AMDGPU.flbit.i32" : 1214 "llvm.amdgcn.sffbh.i32"; 1215 LLVMValueRef msb = ac_build_intrinsic(ctx, intr_name, 1216 dst_type, &arg, 1, 1217 AC_FUNC_ATTR_READNONE); 1218 1219 /* The HW returns the last bit index from MSB, but NIR/TGSI wants 1220 * the index from LSB. Invert it by doing "31 - msb". */ 1221 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false), 1222 msb, ""); 1223 1224 LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true); 1225 LLVMValueRef cond = LLVMBuildOr(ctx->builder, 1226 LLVMBuildICmp(ctx->builder, LLVMIntEQ, 1227 arg, LLVMConstInt(ctx->i32, 0, 0), ""), 1228 LLVMBuildICmp(ctx->builder, LLVMIntEQ, 1229 arg, all_ones, ""), ""); 1230 1231 return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, ""); 1232 } 1233 1234 LLVMValueRef 1235 ac_build_umsb(struct ac_llvm_context *ctx, 1236 LLVMValueRef arg, 1237 LLVMTypeRef dst_type) 1238 { 1239 LLVMValueRef args[2] = { 1240 arg, 1241 ctx->i1true, 1242 }; 1243 LLVMValueRef msb = ac_build_intrinsic(ctx, "llvm.ctlz.i32", 1244 dst_type, args, ARRAY_SIZE(args), 1245 AC_FUNC_ATTR_READNONE); 1246 1247 /* The HW returns the last bit index from MSB, but TGSI/NIR wants 1248 * the index from LSB. Invert it by doing "31 - msb". */ 1249 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false), 1250 msb, ""); 1251 1252 /* check for zero */ 1253 return LLVMBuildSelect(ctx->builder, 1254 LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, 1255 LLVMConstInt(ctx->i32, 0, 0), ""), 1256 LLVMConstInt(ctx->i32, -1, true), msb, ""); 1257 } 1258 1259 LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a, 1260 LLVMValueRef b) 1261 { 1262 LLVMValueRef args[2] = {a, b}; 1263 return ac_build_intrinsic(ctx, "llvm.minnum.f32", ctx->f32, args, 2, 1264 AC_FUNC_ATTR_READNONE); 1265 } 1266 1267 LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a, 1268 LLVMValueRef b) 1269 { 1270 LLVMValueRef args[2] = {a, b}; 1271 return ac_build_intrinsic(ctx, "llvm.maxnum.f32", ctx->f32, args, 2, 1272 AC_FUNC_ATTR_READNONE); 1273 } 1274 1275 LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, 1276 LLVMValueRef b) 1277 { 1278 LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b, ""); 1279 return LLVMBuildSelect(ctx->builder, cmp, a, b, ""); 1280 } 1281 1282 LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value) 1283 { 1284 if (HAVE_LLVM >= 0x0500) { 1285 return ac_build_fmin(ctx, ac_build_fmax(ctx, value, ctx->f32_0), 1286 ctx->f32_1); 1287 } 1288 1289 LLVMValueRef args[3] = { 1290 value, 1291 LLVMConstReal(ctx->f32, 0), 1292 LLVMConstReal(ctx->f32, 1), 1293 }; 1294 1295 return ac_build_intrinsic(ctx, "llvm.AMDGPU.clamp.", ctx->f32, args, 3, 1296 AC_FUNC_ATTR_READNONE | 1297 AC_FUNC_ATTR_LEGACY); 1298 } 1299 1300 void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a) 1301 { 1302 LLVMValueRef args[9]; 1303 1304 if (HAVE_LLVM >= 0x0500) { 1305 args[0] = LLVMConstInt(ctx->i32, a->target, 0); 1306 args[1] = LLVMConstInt(ctx->i32, a->enabled_channels, 0); 1307 1308 if (a->compr) { 1309 LLVMTypeRef i16 = LLVMInt16TypeInContext(ctx->context); 1310 LLVMTypeRef v2i16 = LLVMVectorType(i16, 2); 1311 1312 args[2] = LLVMBuildBitCast(ctx->builder, a->out[0], 1313 v2i16, ""); 1314 args[3] = LLVMBuildBitCast(ctx->builder, a->out[1], 1315 v2i16, ""); 1316 args[4] = LLVMConstInt(ctx->i1, a->done, 0); 1317 args[5] = LLVMConstInt(ctx->i1, a->valid_mask, 0); 1318 1319 ac_build_intrinsic(ctx, "llvm.amdgcn.exp.compr.v2i16", 1320 ctx->voidt, args, 6, 0); 1321 } else { 1322 args[2] = a->out[0]; 1323 args[3] = a->out[1]; 1324 args[4] = a->out[2]; 1325 args[5] = a->out[3]; 1326 args[6] = LLVMConstInt(ctx->i1, a->done, 0); 1327 args[7] = LLVMConstInt(ctx->i1, a->valid_mask, 0); 1328 1329 ac_build_intrinsic(ctx, "llvm.amdgcn.exp.f32", 1330 ctx->voidt, args, 8, 0); 1331 } 1332 return; 1333 } 1334 1335 args[0] = LLVMConstInt(ctx->i32, a->enabled_channels, 0); 1336 args[1] = LLVMConstInt(ctx->i32, a->valid_mask, 0); 1337 args[2] = LLVMConstInt(ctx->i32, a->done, 0); 1338 args[3] = LLVMConstInt(ctx->i32, a->target, 0); 1339 args[4] = LLVMConstInt(ctx->i32, a->compr, 0); 1340 memcpy(args + 5, a->out, sizeof(a->out[0]) * 4); 1341 1342 ac_build_intrinsic(ctx, "llvm.SI.export", ctx->voidt, args, 9, 1343 AC_FUNC_ATTR_LEGACY); 1344 } 1345 1346 LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, 1347 struct ac_image_args *a) 1348 { 1349 LLVMTypeRef dst_type; 1350 LLVMValueRef args[11]; 1351 unsigned num_args = 0; 1352 const char *name = NULL; 1353 char intr_name[128], type[64]; 1354 1355 if (HAVE_LLVM >= 0x0400) { 1356 bool sample = a->opcode == ac_image_sample || 1357 a->opcode == ac_image_gather4 || 1358 a->opcode == ac_image_get_lod; 1359 1360 if (sample) 1361 args[num_args++] = ac_to_float(ctx, a->addr); 1362 else 1363 args[num_args++] = a->addr; 1364 1365 args[num_args++] = a->resource; 1366 if (sample) 1367 args[num_args++] = a->sampler; 1368 args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0); 1369 if (sample) 1370 args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0); 1371 args[num_args++] = ctx->i1false; /* glc */ 1372 args[num_args++] = ctx->i1false; /* slc */ 1373 args[num_args++] = ctx->i1false; /* lwe */ 1374 args[num_args++] = LLVMConstInt(ctx->i1, a->da, 0); 1375 1376 switch (a->opcode) { 1377 case ac_image_sample: 1378 name = "llvm.amdgcn.image.sample"; 1379 break; 1380 case ac_image_gather4: 1381 name = "llvm.amdgcn.image.gather4"; 1382 break; 1383 case ac_image_load: 1384 name = "llvm.amdgcn.image.load"; 1385 break; 1386 case ac_image_load_mip: 1387 name = "llvm.amdgcn.image.load.mip"; 1388 break; 1389 case ac_image_get_lod: 1390 name = "llvm.amdgcn.image.getlod"; 1391 break; 1392 case ac_image_get_resinfo: 1393 name = "llvm.amdgcn.image.getresinfo"; 1394 break; 1395 default: 1396 unreachable("invalid image opcode"); 1397 } 1398 1399 ac_build_type_name_for_intr(LLVMTypeOf(args[0]), type, 1400 sizeof(type)); 1401 1402 snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.v4f32.%s.v8i32", 1403 name, 1404 a->compare ? ".c" : "", 1405 a->bias ? ".b" : 1406 a->lod ? ".l" : 1407 a->deriv ? ".d" : 1408 a->level_zero ? ".lz" : "", 1409 a->offset ? ".o" : "", 1410 type); 1411 1412 LLVMValueRef result = 1413 ac_build_intrinsic(ctx, intr_name, 1414 ctx->v4f32, args, num_args, 1415 AC_FUNC_ATTR_READNONE); 1416 if (!sample) { 1417 result = LLVMBuildBitCast(ctx->builder, result, 1418 ctx->v4i32, ""); 1419 } 1420 return result; 1421 } 1422 1423 args[num_args++] = a->addr; 1424 args[num_args++] = a->resource; 1425 1426 if (a->opcode == ac_image_load || 1427 a->opcode == ac_image_load_mip || 1428 a->opcode == ac_image_get_resinfo) { 1429 dst_type = ctx->v4i32; 1430 } else { 1431 dst_type = ctx->v4f32; 1432 args[num_args++] = a->sampler; 1433 } 1434 1435 args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0); 1436 args[num_args++] = LLVMConstInt(ctx->i32, a->unorm, 0); 1437 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */ 1438 args[num_args++] = LLVMConstInt(ctx->i32, a->da, 0); 1439 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */ 1440 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */ 1441 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */ 1442 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */ 1443 1444 switch (a->opcode) { 1445 case ac_image_sample: 1446 name = "llvm.SI.image.sample"; 1447 break; 1448 case ac_image_gather4: 1449 name = "llvm.SI.gather4"; 1450 break; 1451 case ac_image_load: 1452 name = "llvm.SI.image.load"; 1453 break; 1454 case ac_image_load_mip: 1455 name = "llvm.SI.image.load.mip"; 1456 break; 1457 case ac_image_get_lod: 1458 name = "llvm.SI.getlod"; 1459 break; 1460 case ac_image_get_resinfo: 1461 name = "llvm.SI.getresinfo"; 1462 break; 1463 } 1464 1465 ac_build_type_name_for_intr(LLVMTypeOf(a->addr), type, sizeof(type)); 1466 snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.%s", 1467 name, 1468 a->compare ? ".c" : "", 1469 a->bias ? ".b" : 1470 a->lod ? ".l" : 1471 a->deriv ? ".d" : 1472 a->level_zero ? ".lz" : "", 1473 a->offset ? ".o" : "", 1474 type); 1475 1476 return ac_build_intrinsic(ctx, intr_name, 1477 dst_type, args, num_args, 1478 AC_FUNC_ATTR_READNONE | 1479 AC_FUNC_ATTR_LEGACY); 1480 } 1481 1482 LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, 1483 LLVMValueRef args[2]) 1484 { 1485 if (HAVE_LLVM >= 0x0500) { 1486 LLVMTypeRef v2f16 = 1487 LLVMVectorType(LLVMHalfTypeInContext(ctx->context), 2); 1488 LLVMValueRef res = 1489 ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz", 1490 v2f16, args, 2, 1491 AC_FUNC_ATTR_READNONE); 1492 return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); 1493 } 1494 1495 return ac_build_intrinsic(ctx, "llvm.SI.packf16", ctx->i32, args, 2, 1496 AC_FUNC_ATTR_READNONE | 1497 AC_FUNC_ATTR_LEGACY); 1498 } 1499 1500 LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1) 1501 { 1502 assert(HAVE_LLVM >= 0x0600); 1503 return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1, 1504 &i1, 1, AC_FUNC_ATTR_READNONE); 1505 } 1506 1507 void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1) 1508 { 1509 if (HAVE_LLVM >= 0x0600) { 1510 ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt, 1511 &i1, 1, 0); 1512 return; 1513 } 1514 1515 LLVMValueRef value = LLVMBuildSelect(ctx->builder, i1, 1516 LLVMConstReal(ctx->f32, 1), 1517 LLVMConstReal(ctx->f32, -1), ""); 1518 ac_build_intrinsic(ctx, "llvm.AMDGPU.kill", ctx->voidt, 1519 &value, 1, AC_FUNC_ATTR_LEGACY); 1520 } 1521 1522 LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input, 1523 LLVMValueRef offset, LLVMValueRef width, 1524 bool is_signed) 1525 { 1526 LLVMValueRef args[] = { 1527 input, 1528 offset, 1529 width, 1530 }; 1531 1532 if (HAVE_LLVM >= 0x0500) { 1533 return ac_build_intrinsic(ctx, 1534 is_signed ? "llvm.amdgcn.sbfe.i32" : 1535 "llvm.amdgcn.ubfe.i32", 1536 ctx->i32, args, 3, 1537 AC_FUNC_ATTR_READNONE); 1538 } 1539 1540 return ac_build_intrinsic(ctx, 1541 is_signed ? "llvm.AMDGPU.bfe.i32" : 1542 "llvm.AMDGPU.bfe.u32", 1543 ctx->i32, args, 3, 1544 AC_FUNC_ATTR_READNONE | 1545 AC_FUNC_ATTR_LEGACY); 1546 } 1547 1548 void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16) 1549 { 1550 LLVMValueRef args[1] = { 1551 LLVMConstInt(ctx->i32, simm16, false), 1552 }; 1553 ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt", 1554 ctx->voidt, args, 1, 0); 1555 } 1556 1557 void ac_get_image_intr_name(const char *base_name, 1558 LLVMTypeRef data_type, 1559 LLVMTypeRef coords_type, 1560 LLVMTypeRef rsrc_type, 1561 char *out_name, unsigned out_len) 1562 { 1563 char coords_type_name[8]; 1564 1565 ac_build_type_name_for_intr(coords_type, coords_type_name, 1566 sizeof(coords_type_name)); 1567 1568 if (HAVE_LLVM <= 0x0309) { 1569 snprintf(out_name, out_len, "%s.%s", base_name, coords_type_name); 1570 } else { 1571 char data_type_name[8]; 1572 char rsrc_type_name[8]; 1573 1574 ac_build_type_name_for_intr(data_type, data_type_name, 1575 sizeof(data_type_name)); 1576 ac_build_type_name_for_intr(rsrc_type, rsrc_type_name, 1577 sizeof(rsrc_type_name)); 1578 snprintf(out_name, out_len, "%s.%s.%s.%s", base_name, 1579 data_type_name, coords_type_name, rsrc_type_name); 1580 } 1581 } 1582 1583 #define AC_EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3) 1584 #define AC_EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5) 1585 1586 enum ac_ir_type { 1587 AC_IR_UNDEF, 1588 AC_IR_CONST, 1589 AC_IR_VALUE, 1590 }; 1591 1592 struct ac_vs_exp_chan 1593 { 1594 LLVMValueRef value; 1595 float const_float; 1596 enum ac_ir_type type; 1597 }; 1598 1599 struct ac_vs_exp_inst { 1600 unsigned offset; 1601 LLVMValueRef inst; 1602 struct ac_vs_exp_chan chan[4]; 1603 }; 1604 1605 struct ac_vs_exports { 1606 unsigned num; 1607 struct ac_vs_exp_inst exp[VARYING_SLOT_MAX]; 1608 }; 1609 1610 /* Return true if the PARAM export has been eliminated. */ 1611 static bool ac_eliminate_const_output(uint8_t *vs_output_param_offset, 1612 uint32_t num_outputs, 1613 struct ac_vs_exp_inst *exp) 1614 { 1615 unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */ 1616 bool is_zero[4] = {}, is_one[4] = {}; 1617 1618 for (i = 0; i < 4; i++) { 1619 /* It's a constant expression. Undef outputs are eliminated too. */ 1620 if (exp->chan[i].type == AC_IR_UNDEF) { 1621 is_zero[i] = true; 1622 is_one[i] = true; 1623 } else if (exp->chan[i].type == AC_IR_CONST) { 1624 if (exp->chan[i].const_float == 0) 1625 is_zero[i] = true; 1626 else if (exp->chan[i].const_float == 1) 1627 is_one[i] = true; 1628 else 1629 return false; /* other constant */ 1630 } else 1631 return false; 1632 } 1633 1634 /* Only certain combinations of 0 and 1 can be eliminated. */ 1635 if (is_zero[0] && is_zero[1] && is_zero[2]) 1636 default_val = is_zero[3] ? 0 : 1; 1637 else if (is_one[0] && is_one[1] && is_one[2]) 1638 default_val = is_zero[3] ? 2 : 3; 1639 else 1640 return false; 1641 1642 /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */ 1643 LLVMInstructionEraseFromParent(exp->inst); 1644 1645 /* Change OFFSET to DEFAULT_VAL. */ 1646 for (i = 0; i < num_outputs; i++) { 1647 if (vs_output_param_offset[i] == exp->offset) { 1648 vs_output_param_offset[i] = 1649 AC_EXP_PARAM_DEFAULT_VAL_0000 + default_val; 1650 break; 1651 } 1652 } 1653 return true; 1654 } 1655 1656 static bool ac_eliminate_duplicated_output(uint8_t *vs_output_param_offset, 1657 uint32_t num_outputs, 1658 struct ac_vs_exports *processed, 1659 struct ac_vs_exp_inst *exp) 1660 { 1661 unsigned p, copy_back_channels = 0; 1662 1663 /* See if the output is already in the list of processed outputs. 1664 * The LLVMValueRef comparison relies on SSA. 1665 */ 1666 for (p = 0; p < processed->num; p++) { 1667 bool different = false; 1668 1669 for (unsigned j = 0; j < 4; j++) { 1670 struct ac_vs_exp_chan *c1 = &processed->exp[p].chan[j]; 1671 struct ac_vs_exp_chan *c2 = &exp->chan[j]; 1672 1673 /* Treat undef as a match. */ 1674 if (c2->type == AC_IR_UNDEF) 1675 continue; 1676 1677 /* If c1 is undef but c2 isn't, we can copy c2 to c1 1678 * and consider the instruction duplicated. 1679 */ 1680 if (c1->type == AC_IR_UNDEF) { 1681 copy_back_channels |= 1 << j; 1682 continue; 1683 } 1684 1685 /* Test whether the channels are not equal. */ 1686 if (c1->type != c2->type || 1687 (c1->type == AC_IR_CONST && 1688 c1->const_float != c2->const_float) || 1689 (c1->type == AC_IR_VALUE && 1690 c1->value != c2->value)) { 1691 different = true; 1692 break; 1693 } 1694 } 1695 if (!different) 1696 break; 1697 1698 copy_back_channels = 0; 1699 } 1700 if (p == processed->num) 1701 return false; 1702 1703 /* If a match was found, but the matching export has undef where the new 1704 * one has a normal value, copy the normal value to the undef channel. 1705 */ 1706 struct ac_vs_exp_inst *match = &processed->exp[p]; 1707 1708 while (copy_back_channels) { 1709 unsigned chan = u_bit_scan(©_back_channels); 1710 1711 assert(match->chan[chan].type == AC_IR_UNDEF); 1712 LLVMSetOperand(match->inst, AC_EXP_OUT0 + chan, 1713 exp->chan[chan].value); 1714 match->chan[chan] = exp->chan[chan]; 1715 } 1716 1717 /* The PARAM export is duplicated. Kill it. */ 1718 LLVMInstructionEraseFromParent(exp->inst); 1719 1720 /* Change OFFSET to the matching export. */ 1721 for (unsigned i = 0; i < num_outputs; i++) { 1722 if (vs_output_param_offset[i] == exp->offset) { 1723 vs_output_param_offset[i] = match->offset; 1724 break; 1725 } 1726 } 1727 return true; 1728 } 1729 1730 void ac_optimize_vs_outputs(struct ac_llvm_context *ctx, 1731 LLVMValueRef main_fn, 1732 uint8_t *vs_output_param_offset, 1733 uint32_t num_outputs, 1734 uint8_t *num_param_exports) 1735 { 1736 LLVMBasicBlockRef bb; 1737 bool removed_any = false; 1738 struct ac_vs_exports exports; 1739 1740 exports.num = 0; 1741 1742 /* Process all LLVM instructions. */ 1743 bb = LLVMGetFirstBasicBlock(main_fn); 1744 while (bb) { 1745 LLVMValueRef inst = LLVMGetFirstInstruction(bb); 1746 1747 while (inst) { 1748 LLVMValueRef cur = inst; 1749 inst = LLVMGetNextInstruction(inst); 1750 struct ac_vs_exp_inst exp; 1751 1752 if (LLVMGetInstructionOpcode(cur) != LLVMCall) 1753 continue; 1754 1755 LLVMValueRef callee = ac_llvm_get_called_value(cur); 1756 1757 if (!ac_llvm_is_function(callee)) 1758 continue; 1759 1760 const char *name = LLVMGetValueName(callee); 1761 unsigned num_args = LLVMCountParams(callee); 1762 1763 /* Check if this is an export instruction. */ 1764 if ((num_args != 9 && num_args != 8) || 1765 (strcmp(name, "llvm.SI.export") && 1766 strcmp(name, "llvm.amdgcn.exp.f32"))) 1767 continue; 1768 1769 LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET); 1770 unsigned target = LLVMConstIntGetZExtValue(arg); 1771 1772 if (target < V_008DFC_SQ_EXP_PARAM) 1773 continue; 1774 1775 target -= V_008DFC_SQ_EXP_PARAM; 1776 1777 /* Parse the instruction. */ 1778 memset(&exp, 0, sizeof(exp)); 1779 exp.offset = target; 1780 exp.inst = cur; 1781 1782 for (unsigned i = 0; i < 4; i++) { 1783 LLVMValueRef v = LLVMGetOperand(cur, AC_EXP_OUT0 + i); 1784 1785 exp.chan[i].value = v; 1786 1787 if (LLVMIsUndef(v)) { 1788 exp.chan[i].type = AC_IR_UNDEF; 1789 } else if (LLVMIsAConstantFP(v)) { 1790 LLVMBool loses_info; 1791 exp.chan[i].type = AC_IR_CONST; 1792 exp.chan[i].const_float = 1793 LLVMConstRealGetDouble(v, &loses_info); 1794 } else { 1795 exp.chan[i].type = AC_IR_VALUE; 1796 } 1797 } 1798 1799 /* Eliminate constant and duplicated PARAM exports. */ 1800 if (ac_eliminate_const_output(vs_output_param_offset, 1801 num_outputs, &exp) || 1802 ac_eliminate_duplicated_output(vs_output_param_offset, 1803 num_outputs, &exports, 1804 &exp)) { 1805 removed_any = true; 1806 } else { 1807 exports.exp[exports.num++] = exp; 1808 } 1809 } 1810 bb = LLVMGetNextBasicBlock(bb); 1811 } 1812 1813 /* Remove holes in export memory due to removed PARAM exports. 1814 * This is done by renumbering all PARAM exports. 1815 */ 1816 if (removed_any) { 1817 uint8_t old_offset[VARYING_SLOT_MAX]; 1818 unsigned out, i; 1819 1820 /* Make a copy of the offsets. We need the old version while 1821 * we are modifying some of them. */ 1822 memcpy(old_offset, vs_output_param_offset, 1823 sizeof(old_offset)); 1824 1825 for (i = 0; i < exports.num; i++) { 1826 unsigned offset = exports.exp[i].offset; 1827 1828 /* Update vs_output_param_offset. Multiple outputs can 1829 * have the same offset. 1830 */ 1831 for (out = 0; out < num_outputs; out++) { 1832 if (old_offset[out] == offset) 1833 vs_output_param_offset[out] = i; 1834 } 1835 1836 /* Change the PARAM offset in the instruction. */ 1837 LLVMSetOperand(exports.exp[i].inst, AC_EXP_TARGET, 1838 LLVMConstInt(ctx->i32, 1839 V_008DFC_SQ_EXP_PARAM + i, 0)); 1840 } 1841 *num_param_exports = exports.num; 1842 } 1843 } 1844 1845 void ac_init_exec_full_mask(struct ac_llvm_context *ctx) 1846 { 1847 LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0); 1848 ac_build_intrinsic(ctx, 1849 "llvm.amdgcn.init.exec", ctx->voidt, 1850 &full_mask, 1, AC_FUNC_ATTR_CONVERGENT); 1851 } 1852 1853 void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx) 1854 { 1855 unsigned lds_size = ctx->chip_class >= CIK ? 65536 : 32768; 1856 ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32_0, 1857 LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_LOCAL_ADDR_SPACE), 1858 "lds"); 1859 } 1860 1861 LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx, 1862 LLVMValueRef dw_addr) 1863 { 1864 return ac_build_load(ctx, ctx->lds, dw_addr); 1865 } 1866 1867 void ac_lds_store(struct ac_llvm_context *ctx, 1868 LLVMValueRef dw_addr, 1869 LLVMValueRef value) 1870 { 1871 value = ac_to_integer(ctx, value); 1872 ac_build_indexed_store(ctx, ctx->lds, 1873 dw_addr, value); 1874 } 1875 1876 LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, 1877 LLVMTypeRef dst_type, 1878 LLVMValueRef src0) 1879 { 1880 LLVMValueRef params[2] = { 1881 src0, 1882 1883 /* The value of 1 means that ffs(x=0) = undef, so LLVM won't 1884 * add special code to check for x=0. The reason is that 1885 * the LLVM behavior for x=0 is different from what we 1886 * need here. However, LLVM also assumes that ffs(x) is 1887 * in [0, 31], but GLSL expects that ffs(0) = -1, so 1888 * a conditional assignment to handle 0 is still required. 1889 * 1890 * The hardware already implements the correct behavior. 1891 */ 1892 LLVMConstInt(ctx->i1, 1, false), 1893 }; 1894 1895 LLVMValueRef lsb = ac_build_intrinsic(ctx, "llvm.cttz.i32", ctx->i32, 1896 params, 2, 1897 AC_FUNC_ATTR_READNONE); 1898 1899 /* TODO: We need an intrinsic to skip this conditional. */ 1900 /* Check for zero: */ 1901 return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, 1902 LLVMIntEQ, src0, 1903 ctx->i32_0, ""), 1904 LLVMConstInt(ctx->i32, -1, 0), lsb, ""); 1905 } 1906 1907 static struct ac_llvm_flow * 1908 get_current_flow(struct ac_llvm_context *ctx) 1909 { 1910 if (ctx->flow_depth > 0) 1911 return &ctx->flow[ctx->flow_depth - 1]; 1912 return NULL; 1913 } 1914 1915 static struct ac_llvm_flow * 1916 get_innermost_loop(struct ac_llvm_context *ctx) 1917 { 1918 for (unsigned i = ctx->flow_depth; i > 0; --i) { 1919 if (ctx->flow[i - 1].loop_entry_block) 1920 return &ctx->flow[i - 1]; 1921 } 1922 return NULL; 1923 } 1924 1925 static struct ac_llvm_flow * 1926 push_flow(struct ac_llvm_context *ctx) 1927 { 1928 struct ac_llvm_flow *flow; 1929 1930 if (ctx->flow_depth >= ctx->flow_depth_max) { 1931 unsigned new_max = MAX2(ctx->flow_depth << 1, 1932 AC_LLVM_INITIAL_CF_DEPTH); 1933 1934 ctx->flow = realloc(ctx->flow, new_max * sizeof(*ctx->flow)); 1935 ctx->flow_depth_max = new_max; 1936 } 1937 1938 flow = &ctx->flow[ctx->flow_depth]; 1939 ctx->flow_depth++; 1940 1941 flow->next_block = NULL; 1942 flow->loop_entry_block = NULL; 1943 return flow; 1944 } 1945 1946 static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, 1947 int label_id) 1948 { 1949 char buf[32]; 1950 snprintf(buf, sizeof(buf), "%s%d", base, label_id); 1951 LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf); 1952 } 1953 1954 /* Append a basic block at the level of the parent flow. 1955 */ 1956 static LLVMBasicBlockRef append_basic_block(struct ac_llvm_context *ctx, 1957 const char *name) 1958 { 1959 assert(ctx->flow_depth >= 1); 1960 1961 if (ctx->flow_depth >= 2) { 1962 struct ac_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2]; 1963 1964 return LLVMInsertBasicBlockInContext(ctx->context, 1965 flow->next_block, name); 1966 } 1967 1968 LLVMValueRef main_fn = 1969 LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder)); 1970 return LLVMAppendBasicBlockInContext(ctx->context, main_fn, name); 1971 } 1972 1973 /* Emit a branch to the given default target for the current block if 1974 * applicable -- that is, if the current block does not already contain a 1975 * branch from a break or continue. 1976 */ 1977 static void emit_default_branch(LLVMBuilderRef builder, 1978 LLVMBasicBlockRef target) 1979 { 1980 if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder))) 1981 LLVMBuildBr(builder, target); 1982 } 1983 1984 void ac_build_bgnloop(struct ac_llvm_context *ctx, int label_id) 1985 { 1986 struct ac_llvm_flow *flow = push_flow(ctx); 1987 flow->loop_entry_block = append_basic_block(ctx, "LOOP"); 1988 flow->next_block = append_basic_block(ctx, "ENDLOOP"); 1989 set_basicblock_name(flow->loop_entry_block, "loop", label_id); 1990 LLVMBuildBr(ctx->builder, flow->loop_entry_block); 1991 LLVMPositionBuilderAtEnd(ctx->builder, flow->loop_entry_block); 1992 } 1993 1994 void ac_build_break(struct ac_llvm_context *ctx) 1995 { 1996 struct ac_llvm_flow *flow = get_innermost_loop(ctx); 1997 LLVMBuildBr(ctx->builder, flow->next_block); 1998 } 1999 2000 void ac_build_continue(struct ac_llvm_context *ctx) 2001 { 2002 struct ac_llvm_flow *flow = get_innermost_loop(ctx); 2003 LLVMBuildBr(ctx->builder, flow->loop_entry_block); 2004 } 2005 2006 void ac_build_else(struct ac_llvm_context *ctx, int label_id) 2007 { 2008 struct ac_llvm_flow *current_branch = get_current_flow(ctx); 2009 LLVMBasicBlockRef endif_block; 2010 2011 assert(!current_branch->loop_entry_block); 2012 2013 endif_block = append_basic_block(ctx, "ENDIF"); 2014 emit_default_branch(ctx->builder, endif_block); 2015 2016 LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block); 2017 set_basicblock_name(current_branch->next_block, "else", label_id); 2018 2019 current_branch->next_block = endif_block; 2020 } 2021 2022 void ac_build_endif(struct ac_llvm_context *ctx, int label_id) 2023 { 2024 struct ac_llvm_flow *current_branch = get_current_flow(ctx); 2025 2026 assert(!current_branch->loop_entry_block); 2027 2028 emit_default_branch(ctx->builder, current_branch->next_block); 2029 LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block); 2030 set_basicblock_name(current_branch->next_block, "endif", label_id); 2031 2032 ctx->flow_depth--; 2033 } 2034 2035 void ac_build_endloop(struct ac_llvm_context *ctx, int label_id) 2036 { 2037 struct ac_llvm_flow *current_loop = get_current_flow(ctx); 2038 2039 assert(current_loop->loop_entry_block); 2040 2041 emit_default_branch(ctx->builder, current_loop->loop_entry_block); 2042 2043 LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block); 2044 set_basicblock_name(current_loop->next_block, "endloop", label_id); 2045 ctx->flow_depth--; 2046 } 2047 2048 static void if_cond_emit(struct ac_llvm_context *ctx, LLVMValueRef cond, 2049 int label_id) 2050 { 2051 struct ac_llvm_flow *flow = push_flow(ctx); 2052 LLVMBasicBlockRef if_block; 2053 2054 if_block = append_basic_block(ctx, "IF"); 2055 flow->next_block = append_basic_block(ctx, "ELSE"); 2056 set_basicblock_name(if_block, "if", label_id); 2057 LLVMBuildCondBr(ctx->builder, cond, if_block, flow->next_block); 2058 LLVMPositionBuilderAtEnd(ctx->builder, if_block); 2059 } 2060 2061 void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value, 2062 int label_id) 2063 { 2064 LLVMValueRef cond = LLVMBuildFCmp(ctx->builder, LLVMRealUNE, 2065 value, ctx->f32_0, ""); 2066 if_cond_emit(ctx, cond, label_id); 2067 } 2068 2069 void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value, 2070 int label_id) 2071 { 2072 LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, 2073 ac_to_integer(ctx, value), 2074 ctx->i32_0, ""); 2075 if_cond_emit(ctx, cond, label_id); 2076 } 2077