1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "si_shader_internal.h" 25 #include "si_pipe.h" 26 #include "radeon/radeon_elf_util.h" 27 28 #include "gallivm/lp_bld_const.h" 29 #include "gallivm/lp_bld_gather.h" 30 #include "gallivm/lp_bld_flow.h" 31 #include "gallivm/lp_bld_init.h" 32 #include "gallivm/lp_bld_intr.h" 33 #include "gallivm/lp_bld_misc.h" 34 #include "gallivm/lp_bld_swizzle.h" 35 #include "tgsi/tgsi_info.h" 36 #include "tgsi/tgsi_parse.h" 37 #include "util/u_math.h" 38 #include "util/u_memory.h" 39 #include "util/u_debug.h" 40 41 #include <stdio.h> 42 #include <llvm-c/Transforms/IPO.h> 43 #include <llvm-c/Transforms/Scalar.h> 44 #include <llvm-c/Support.h> 45 46 /* Data for if/else/endif and bgnloop/endloop control flow structures. 47 */ 48 struct si_llvm_flow { 49 /* Loop exit or next part of if/else/endif. */ 50 LLVMBasicBlockRef next_block; 51 LLVMBasicBlockRef loop_entry_block; 52 }; 53 54 #define CPU_STRING_LEN 30 55 #define FS_STRING_LEN 30 56 #define TRIPLE_STRING_LEN 7 57 58 /** 59 * Shader types for the LLVM backend. 60 */ 61 enum si_llvm_shader_type { 62 RADEON_LLVM_SHADER_PS = 0, 63 RADEON_LLVM_SHADER_VS = 1, 64 RADEON_LLVM_SHADER_GS = 2, 65 RADEON_LLVM_SHADER_CS = 3, 66 }; 67 68 enum si_llvm_calling_convention { 69 RADEON_LLVM_AMDGPU_VS = 87, 70 RADEON_LLVM_AMDGPU_GS = 88, 71 RADEON_LLVM_AMDGPU_PS = 89, 72 RADEON_LLVM_AMDGPU_CS = 90, 73 }; 74 75 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value) 76 { 77 char str[16]; 78 79 snprintf(str, sizeof(str), "%i", value); 80 LLVMAddTargetDependentFunctionAttr(F, name, str); 81 } 82 83 /** 84 * Set the shader type we want to compile 85 * 86 * @param type shader type to set 87 */ 88 void si_llvm_shader_type(LLVMValueRef F, unsigned type) 89 { 90 enum si_llvm_shader_type llvm_type; 91 enum si_llvm_calling_convention calling_conv; 92 93 switch (type) { 94 case PIPE_SHADER_VERTEX: 95 case PIPE_SHADER_TESS_CTRL: 96 case PIPE_SHADER_TESS_EVAL: 97 llvm_type = RADEON_LLVM_SHADER_VS; 98 calling_conv = RADEON_LLVM_AMDGPU_VS; 99 break; 100 case PIPE_SHADER_GEOMETRY: 101 llvm_type = RADEON_LLVM_SHADER_GS; 102 calling_conv = RADEON_LLVM_AMDGPU_GS; 103 break; 104 case PIPE_SHADER_FRAGMENT: 105 llvm_type = RADEON_LLVM_SHADER_PS; 106 calling_conv = RADEON_LLVM_AMDGPU_PS; 107 break; 108 case PIPE_SHADER_COMPUTE: 109 llvm_type = RADEON_LLVM_SHADER_CS; 110 calling_conv = RADEON_LLVM_AMDGPU_CS; 111 break; 112 default: 113 unreachable("Unhandle shader type"); 114 } 115 116 if (HAVE_LLVM >= 0x309) 117 LLVMSetFunctionCallConv(F, calling_conv); 118 else 119 si_llvm_add_attribute(F, "ShaderType", llvm_type); 120 } 121 122 static void init_amdgpu_target() 123 { 124 gallivm_init_llvm_targets(); 125 #if HAVE_LLVM < 0x0307 126 LLVMInitializeR600TargetInfo(); 127 LLVMInitializeR600Target(); 128 LLVMInitializeR600TargetMC(); 129 LLVMInitializeR600AsmPrinter(); 130 #else 131 LLVMInitializeAMDGPUTargetInfo(); 132 LLVMInitializeAMDGPUTarget(); 133 LLVMInitializeAMDGPUTargetMC(); 134 LLVMInitializeAMDGPUAsmPrinter(); 135 136 #endif 137 if (HAVE_LLVM >= 0x0400) { 138 /* 139 * Workaround for bug in llvm 4.0 that causes image intrinsics 140 * to disappear. 141 * https://reviews.llvm.org/D26348 142 */ 143 const char *argv[2] = {"mesa", "-simplifycfg-sink-common=false"}; 144 LLVMParseCommandLineOptions(2, argv, NULL); 145 } 146 } 147 148 static once_flag init_amdgpu_target_once_flag = ONCE_FLAG_INIT; 149 150 LLVMTargetRef si_llvm_get_amdgpu_target(const char *triple) 151 { 152 LLVMTargetRef target = NULL; 153 char *err_message = NULL; 154 155 call_once(&init_amdgpu_target_once_flag, init_amdgpu_target); 156 157 if (LLVMGetTargetFromTriple(triple, &target, &err_message)) { 158 fprintf(stderr, "Cannot find target for triple %s ", triple); 159 if (err_message) { 160 fprintf(stderr, "%s\n", err_message); 161 } 162 LLVMDisposeMessage(err_message); 163 return NULL; 164 } 165 return target; 166 } 167 168 struct si_llvm_diagnostics { 169 struct pipe_debug_callback *debug; 170 unsigned retval; 171 }; 172 173 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context) 174 { 175 struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context; 176 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di); 177 char *description = LLVMGetDiagInfoDescription(di); 178 const char *severity_str = NULL; 179 180 switch (severity) { 181 case LLVMDSError: 182 severity_str = "error"; 183 break; 184 case LLVMDSWarning: 185 severity_str = "warning"; 186 break; 187 case LLVMDSRemark: 188 severity_str = "remark"; 189 break; 190 case LLVMDSNote: 191 severity_str = "note"; 192 break; 193 default: 194 severity_str = "unknown"; 195 } 196 197 pipe_debug_message(diag->debug, SHADER_INFO, 198 "LLVM diagnostic (%s): %s", severity_str, description); 199 200 if (severity == LLVMDSError) { 201 diag->retval = 1; 202 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description); 203 } 204 205 LLVMDisposeMessage(description); 206 } 207 208 /** 209 * Compile an LLVM module to machine code. 210 * 211 * @returns 0 for success, 1 for failure 212 */ 213 unsigned si_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary, 214 LLVMTargetMachineRef tm, 215 struct pipe_debug_callback *debug) 216 { 217 struct si_llvm_diagnostics diag; 218 char *err; 219 LLVMContextRef llvm_ctx; 220 LLVMMemoryBufferRef out_buffer; 221 unsigned buffer_size; 222 const char *buffer_data; 223 LLVMBool mem_err; 224 225 diag.debug = debug; 226 diag.retval = 0; 227 228 /* Setup Diagnostic Handler*/ 229 llvm_ctx = LLVMGetModuleContext(M); 230 231 LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag); 232 233 /* Compile IR*/ 234 mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err, 235 &out_buffer); 236 237 /* Process Errors/Warnings */ 238 if (mem_err) { 239 fprintf(stderr, "%s: %s", __FUNCTION__, err); 240 pipe_debug_message(debug, SHADER_INFO, 241 "LLVM emit error: %s", err); 242 FREE(err); 243 diag.retval = 1; 244 goto out; 245 } 246 247 /* Extract Shader Code*/ 248 buffer_size = LLVMGetBufferSize(out_buffer); 249 buffer_data = LLVMGetBufferStart(out_buffer); 250 251 radeon_elf_read(buffer_data, buffer_size, binary); 252 253 /* Clean up */ 254 LLVMDisposeMemoryBuffer(out_buffer); 255 256 out: 257 if (diag.retval != 0) 258 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed"); 259 return diag.retval; 260 } 261 262 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base, 263 enum tgsi_opcode_type type) 264 { 265 LLVMContextRef ctx = bld_base->base.gallivm->context; 266 267 switch (type) { 268 case TGSI_TYPE_UNSIGNED: 269 case TGSI_TYPE_SIGNED: 270 return LLVMInt32TypeInContext(ctx); 271 case TGSI_TYPE_UNSIGNED64: 272 case TGSI_TYPE_SIGNED64: 273 return LLVMInt64TypeInContext(ctx); 274 case TGSI_TYPE_DOUBLE: 275 return LLVMDoubleTypeInContext(ctx); 276 case TGSI_TYPE_UNTYPED: 277 case TGSI_TYPE_FLOAT: 278 return LLVMFloatTypeInContext(ctx); 279 default: break; 280 } 281 return 0; 282 } 283 284 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base, 285 enum tgsi_opcode_type type, LLVMValueRef value) 286 { 287 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 288 LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type); 289 290 if (dst_type) 291 return LLVMBuildBitCast(builder, value, dst_type, ""); 292 else 293 return value; 294 } 295 296 /** 297 * Return a value that is equal to the given i32 \p index if it lies in [0,num) 298 * or an undefined value in the same interval otherwise. 299 */ 300 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx, 301 LLVMValueRef index, 302 unsigned num) 303 { 304 struct gallivm_state *gallivm = &ctx->gallivm; 305 LLVMBuilderRef builder = gallivm->builder; 306 LLVMValueRef c_max = lp_build_const_int32(gallivm, num - 1); 307 LLVMValueRef cc; 308 309 if (util_is_power_of_two(num)) { 310 index = LLVMBuildAnd(builder, index, c_max, ""); 311 } else { 312 /* In theory, this MAX pattern should result in code that is 313 * as good as the bit-wise AND above. 314 * 315 * In practice, LLVM generates worse code (at the time of 316 * writing), because its value tracking is not strong enough. 317 */ 318 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, ""); 319 index = LLVMBuildSelect(builder, cc, index, c_max, ""); 320 } 321 322 return index; 323 } 324 325 static struct si_llvm_flow * 326 get_current_flow(struct si_shader_context *ctx) 327 { 328 if (ctx->flow_depth > 0) 329 return &ctx->flow[ctx->flow_depth - 1]; 330 return NULL; 331 } 332 333 static struct si_llvm_flow * 334 get_innermost_loop(struct si_shader_context *ctx) 335 { 336 for (unsigned i = ctx->flow_depth; i > 0; --i) { 337 if (ctx->flow[i - 1].loop_entry_block) 338 return &ctx->flow[i - 1]; 339 } 340 return NULL; 341 } 342 343 static struct si_llvm_flow * 344 push_flow(struct si_shader_context *ctx) 345 { 346 struct si_llvm_flow *flow; 347 348 if (ctx->flow_depth >= ctx->flow_depth_max) { 349 unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH); 350 ctx->flow = REALLOC(ctx->flow, 351 ctx->flow_depth_max * sizeof(*ctx->flow), 352 new_max * sizeof(*ctx->flow)); 353 ctx->flow_depth_max = new_max; 354 } 355 356 flow = &ctx->flow[ctx->flow_depth]; 357 ctx->flow_depth++; 358 359 flow->next_block = NULL; 360 flow->loop_entry_block = NULL; 361 return flow; 362 } 363 364 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base, 365 LLVMValueRef value, 366 unsigned swizzle_x, 367 unsigned swizzle_y, 368 unsigned swizzle_z, 369 unsigned swizzle_w) 370 { 371 LLVMValueRef swizzles[4]; 372 LLVMTypeRef i32t = 373 LLVMInt32TypeInContext(bld_base->base.gallivm->context); 374 375 swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0); 376 swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0); 377 swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0); 378 swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0); 379 380 return LLVMBuildShuffleVector(bld_base->base.gallivm->builder, 381 value, 382 LLVMGetUndef(LLVMTypeOf(value)), 383 LLVMConstVector(swizzles, 4), ""); 384 } 385 386 /** 387 * Return the description of the array covering the given temporary register 388 * index. 389 */ 390 static unsigned 391 get_temp_array_id(struct lp_build_tgsi_context *bld_base, 392 unsigned reg_index, 393 const struct tgsi_ind_register *reg) 394 { 395 struct si_shader_context *ctx = si_shader_context(bld_base); 396 unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY]; 397 unsigned i; 398 399 if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays) 400 return reg->ArrayID; 401 402 for (i = 0; i < num_arrays; i++) { 403 const struct tgsi_array_info *array = &ctx->temp_arrays[i]; 404 405 if (reg_index >= array->range.First && reg_index <= array->range.Last) 406 return i + 1; 407 } 408 409 return 0; 410 } 411 412 static struct tgsi_declaration_range 413 get_array_range(struct lp_build_tgsi_context *bld_base, 414 unsigned File, unsigned reg_index, 415 const struct tgsi_ind_register *reg) 416 { 417 struct si_shader_context *ctx = si_shader_context(bld_base); 418 struct tgsi_declaration_range range; 419 420 if (File == TGSI_FILE_TEMPORARY) { 421 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg); 422 if (array_id) 423 return ctx->temp_arrays[array_id - 1].range; 424 } 425 426 range.First = 0; 427 range.Last = bld_base->info->file_max[File]; 428 return range; 429 } 430 431 static LLVMValueRef 432 emit_array_index(struct si_shader_context *ctx, 433 const struct tgsi_ind_register *reg, 434 unsigned offset) 435 { 436 struct gallivm_state *gallivm = ctx->bld_base.base.gallivm; 437 438 if (!reg) { 439 return lp_build_const_int32(gallivm, offset); 440 } 441 LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, ctx->addrs[reg->Index][reg->Swizzle], ""); 442 return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), ""); 443 } 444 445 /** 446 * For indirect registers, construct a pointer directly to the requested 447 * element using getelementptr if possible. 448 * 449 * Returns NULL if the insertelement/extractelement fallback for array access 450 * must be used. 451 */ 452 static LLVMValueRef 453 get_pointer_into_array(struct si_shader_context *ctx, 454 unsigned file, 455 unsigned swizzle, 456 unsigned reg_index, 457 const struct tgsi_ind_register *reg_indirect) 458 { 459 unsigned array_id; 460 struct tgsi_array_info *array; 461 struct gallivm_state *gallivm = ctx->bld_base.base.gallivm; 462 LLVMBuilderRef builder = gallivm->builder; 463 LLVMValueRef idxs[2]; 464 LLVMValueRef index; 465 LLVMValueRef alloca; 466 467 if (file != TGSI_FILE_TEMPORARY) 468 return NULL; 469 470 array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect); 471 if (!array_id) 472 return NULL; 473 474 alloca = ctx->temp_array_allocas[array_id - 1]; 475 if (!alloca) 476 return NULL; 477 478 array = &ctx->temp_arrays[array_id - 1]; 479 480 if (!(array->writemask & (1 << swizzle))) 481 return ctx->undef_alloca; 482 483 index = emit_array_index(ctx, reg_indirect, 484 reg_index - ctx->temp_arrays[array_id - 1].range.First); 485 486 /* Ensure that the index is within a valid range, to guard against 487 * VM faults and overwriting critical data (e.g. spilled resource 488 * descriptors). 489 * 490 * TODO It should be possible to avoid the additional instructions 491 * if LLVM is changed so that it guarantuees: 492 * 1. the scratch space descriptor isolates the current wave (this 493 * could even save the scratch offset SGPR at the cost of an 494 * additional SALU instruction) 495 * 2. the memory for allocas must be allocated at the _end_ of the 496 * scratch space (after spilled registers) 497 */ 498 index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1); 499 500 index = LLVMBuildMul( 501 builder, index, 502 lp_build_const_int32(gallivm, util_bitcount(array->writemask)), 503 ""); 504 index = LLVMBuildAdd( 505 builder, index, 506 lp_build_const_int32( 507 gallivm, 508 util_bitcount(array->writemask & ((1 << swizzle) - 1))), 509 ""); 510 idxs[0] = ctx->bld_base.uint_bld.zero; 511 idxs[1] = index; 512 return LLVMBuildGEP(builder, alloca, idxs, 2, ""); 513 } 514 515 LLVMValueRef 516 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base, 517 enum tgsi_opcode_type type, 518 LLVMValueRef ptr, 519 LLVMValueRef ptr2) 520 { 521 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 522 LLVMValueRef result; 523 524 result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2)); 525 526 result = LLVMBuildInsertElement(builder, 527 result, 528 bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr), 529 bld_base->int_bld.zero, ""); 530 result = LLVMBuildInsertElement(builder, 531 result, 532 bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2), 533 bld_base->int_bld.one, ""); 534 return bitcast(bld_base, type, result); 535 } 536 537 static LLVMValueRef 538 emit_array_fetch(struct lp_build_tgsi_context *bld_base, 539 unsigned File, enum tgsi_opcode_type type, 540 struct tgsi_declaration_range range, 541 unsigned swizzle) 542 { 543 struct si_shader_context *ctx = si_shader_context(bld_base); 544 struct gallivm_state *gallivm = ctx->bld_base.base.gallivm; 545 546 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 547 548 unsigned i, size = range.Last - range.First + 1; 549 LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size); 550 LLVMValueRef result = LLVMGetUndef(vec); 551 552 struct tgsi_full_src_register tmp_reg = {}; 553 tmp_reg.Register.File = File; 554 555 for (i = 0; i < size; ++i) { 556 tmp_reg.Register.Index = i + range.First; 557 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle); 558 result = LLVMBuildInsertElement(builder, result, temp, 559 lp_build_const_int32(gallivm, i), "array_vector"); 560 } 561 return result; 562 } 563 564 static LLVMValueRef 565 load_value_from_array(struct lp_build_tgsi_context *bld_base, 566 unsigned file, 567 enum tgsi_opcode_type type, 568 unsigned swizzle, 569 unsigned reg_index, 570 const struct tgsi_ind_register *reg_indirect) 571 { 572 struct si_shader_context *ctx = si_shader_context(bld_base); 573 struct gallivm_state *gallivm = bld_base->base.gallivm; 574 LLVMBuilderRef builder = gallivm->builder; 575 LLVMValueRef ptr; 576 577 ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect); 578 if (ptr) { 579 LLVMValueRef val = LLVMBuildLoad(builder, ptr, ""); 580 if (tgsi_type_is_64bit(type)) { 581 LLVMValueRef ptr_hi, val_hi; 582 ptr_hi = LLVMBuildGEP(builder, ptr, &bld_base->uint_bld.one, 1, ""); 583 val_hi = LLVMBuildLoad(builder, ptr_hi, ""); 584 val = si_llvm_emit_fetch_64bit(bld_base, type, val, val_hi); 585 } 586 587 return val; 588 } else { 589 struct tgsi_declaration_range range = 590 get_array_range(bld_base, file, reg_index, reg_indirect); 591 LLVMValueRef index = 592 emit_array_index(ctx, reg_indirect, reg_index - range.First); 593 LLVMValueRef array = 594 emit_array_fetch(bld_base, file, type, range, swizzle); 595 return LLVMBuildExtractElement(builder, array, index, ""); 596 } 597 } 598 599 static void 600 store_value_to_array(struct lp_build_tgsi_context *bld_base, 601 LLVMValueRef value, 602 unsigned file, 603 unsigned chan_index, 604 unsigned reg_index, 605 const struct tgsi_ind_register *reg_indirect) 606 { 607 struct si_shader_context *ctx = si_shader_context(bld_base); 608 struct gallivm_state *gallivm = bld_base->base.gallivm; 609 LLVMBuilderRef builder = gallivm->builder; 610 LLVMValueRef ptr; 611 612 ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect); 613 if (ptr) { 614 LLVMBuildStore(builder, value, ptr); 615 } else { 616 unsigned i, size; 617 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect); 618 LLVMValueRef index = emit_array_index(ctx, reg_indirect, reg_index - range.First); 619 LLVMValueRef array = 620 emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index); 621 LLVMValueRef temp_ptr; 622 623 array = LLVMBuildInsertElement(builder, array, value, index, ""); 624 625 size = range.Last - range.First + 1; 626 for (i = 0; i < size; ++i) { 627 switch(file) { 628 case TGSI_FILE_OUTPUT: 629 temp_ptr = ctx->outputs[i + range.First][chan_index]; 630 break; 631 632 case TGSI_FILE_TEMPORARY: 633 if (range.First + i >= ctx->temps_count) 634 continue; 635 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index]; 636 break; 637 638 default: 639 continue; 640 } 641 value = LLVMBuildExtractElement(builder, array, 642 lp_build_const_int32(gallivm, i), ""); 643 LLVMBuildStore(builder, value, temp_ptr); 644 } 645 } 646 } 647 648 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise, 649 * reload them at each use. This must be true if the shader is using 650 * derivatives, because all inputs should be loaded in the WQM mode. 651 */ 652 static bool si_preload_fs_inputs(struct si_shader_context *ctx) 653 { 654 return ctx->shader->selector->info.uses_derivatives; 655 } 656 657 static LLVMValueRef 658 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index, 659 unsigned chan) 660 { 661 struct si_shader_context *ctx = si_shader_context(bld_base); 662 663 assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]); 664 return ctx->outputs[index][chan]; 665 } 666 667 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base, 668 const struct tgsi_full_src_register *reg, 669 enum tgsi_opcode_type type, 670 unsigned swizzle) 671 { 672 struct si_shader_context *ctx = si_shader_context(bld_base); 673 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 674 LLVMValueRef result = NULL, ptr, ptr2; 675 676 if (swizzle == ~0) { 677 LLVMValueRef values[TGSI_NUM_CHANNELS]; 678 unsigned chan; 679 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 680 values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan); 681 } 682 return lp_build_gather_values(bld_base->base.gallivm, values, 683 TGSI_NUM_CHANNELS); 684 } 685 686 if (reg->Register.Indirect) { 687 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type, 688 swizzle, reg->Register.Index, ®->Indirect); 689 return bitcast(bld_base, type, load); 690 } 691 692 switch(reg->Register.File) { 693 case TGSI_FILE_IMMEDIATE: { 694 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type); 695 if (tgsi_type_is_64bit(type)) { 696 result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2)); 697 result = LLVMConstInsertElement(result, 698 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], 699 bld_base->int_bld.zero); 700 result = LLVMConstInsertElement(result, 701 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1], 702 bld_base->int_bld.one); 703 return LLVMConstBitCast(result, ctype); 704 } else { 705 return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype); 706 } 707 } 708 709 case TGSI_FILE_INPUT: { 710 unsigned index = reg->Register.Index; 711 LLVMValueRef input[4]; 712 713 /* I don't think doing this for vertex shaders is beneficial. 714 * For those, we want to make sure the VMEM loads are executed 715 * only once. Fragment shaders don't care much, because 716 * v_interp instructions are much cheaper than VMEM loads. 717 */ 718 if (!si_preload_fs_inputs(ctx) && 719 ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT) 720 ctx->load_input(ctx, index, &ctx->input_decls[index], input); 721 else 722 memcpy(input, &ctx->inputs[index * 4], sizeof(input)); 723 724 result = input[swizzle]; 725 726 if (tgsi_type_is_64bit(type)) { 727 ptr = result; 728 ptr2 = input[swizzle + 1]; 729 return si_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2); 730 } 731 break; 732 } 733 734 case TGSI_FILE_TEMPORARY: 735 if (reg->Register.Index >= ctx->temps_count) 736 return LLVMGetUndef(tgsi2llvmtype(bld_base, type)); 737 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle]; 738 if (tgsi_type_is_64bit(type)) { 739 ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1]; 740 return si_llvm_emit_fetch_64bit(bld_base, type, 741 LLVMBuildLoad(builder, ptr, ""), 742 LLVMBuildLoad(builder, ptr2, "")); 743 } 744 result = LLVMBuildLoad(builder, ptr, ""); 745 break; 746 747 case TGSI_FILE_OUTPUT: 748 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle); 749 if (tgsi_type_is_64bit(type)) { 750 ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1); 751 return si_llvm_emit_fetch_64bit(bld_base, type, 752 LLVMBuildLoad(builder, ptr, ""), 753 LLVMBuildLoad(builder, ptr2, "")); 754 } 755 result = LLVMBuildLoad(builder, ptr, ""); 756 break; 757 758 default: 759 return LLVMGetUndef(tgsi2llvmtype(bld_base, type)); 760 } 761 762 return bitcast(bld_base, type, result); 763 } 764 765 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base, 766 const struct tgsi_full_src_register *reg, 767 enum tgsi_opcode_type type, 768 unsigned swizzle) 769 { 770 struct si_shader_context *ctx = si_shader_context(bld_base); 771 struct gallivm_state *gallivm = bld_base->base.gallivm; 772 773 LLVMValueRef cval = ctx->system_values[reg->Register.Index]; 774 if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) { 775 cval = LLVMBuildExtractElement(gallivm->builder, cval, 776 lp_build_const_int32(gallivm, swizzle), ""); 777 } 778 return bitcast(bld_base, type, cval); 779 } 780 781 static void emit_declaration(struct lp_build_tgsi_context *bld_base, 782 const struct tgsi_full_declaration *decl) 783 { 784 struct si_shader_context *ctx = si_shader_context(bld_base); 785 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 786 unsigned first, last, i; 787 switch(decl->Declaration.File) { 788 case TGSI_FILE_ADDRESS: 789 { 790 unsigned idx; 791 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { 792 unsigned chan; 793 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 794 ctx->addrs[idx][chan] = lp_build_alloca_undef( 795 &ctx->gallivm, 796 ctx->bld_base.uint_bld.elem_type, ""); 797 } 798 } 799 break; 800 } 801 802 case TGSI_FILE_TEMPORARY: 803 { 804 char name[16] = ""; 805 LLVMValueRef array_alloca = NULL; 806 unsigned decl_size; 807 unsigned writemask = decl->Declaration.UsageMask; 808 first = decl->Range.First; 809 last = decl->Range.Last; 810 decl_size = 4 * ((last - first) + 1); 811 812 if (decl->Declaration.Array) { 813 unsigned id = decl->Array.ArrayID - 1; 814 unsigned array_size; 815 816 writemask &= ctx->temp_arrays[id].writemask; 817 ctx->temp_arrays[id].writemask = writemask; 818 array_size = ((last - first) + 1) * util_bitcount(writemask); 819 820 /* If the array has more than 16 elements, store it 821 * in memory using an alloca that spans the entire 822 * array. 823 * 824 * Otherwise, store each array element individually. 825 * We will then generate vectors (per-channel, up to 826 * <16 x float> if the usagemask is a single bit) for 827 * indirect addressing. 828 * 829 * Note that 16 is the number of vector elements that 830 * LLVM will store in a register, so theoretically an 831 * array with up to 4 * 16 = 64 elements could be 832 * handled this way, but whether that's a good idea 833 * depends on VGPR register pressure elsewhere. 834 * 835 * FIXME: We shouldn't need to have the non-alloca 836 * code path for arrays. LLVM should be smart enough to 837 * promote allocas into registers when profitable. 838 * 839 * LLVM 3.8 crashes with this. 840 */ 841 if (HAVE_LLVM >= 0x0309 && array_size > 16) { 842 array_alloca = LLVMBuildAlloca(builder, 843 LLVMArrayType(bld_base->base.vec_type, 844 array_size), "array"); 845 ctx->temp_array_allocas[id] = array_alloca; 846 } 847 } 848 849 if (!ctx->temps_count) { 850 ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1; 851 ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef)); 852 } 853 if (!array_alloca) { 854 for (i = 0; i < decl_size; ++i) { 855 #ifdef DEBUG 856 snprintf(name, sizeof(name), "TEMP%d.%c", 857 first + i / 4, "xyzw"[i % 4]); 858 #endif 859 ctx->temps[first * TGSI_NUM_CHANNELS + i] = 860 lp_build_alloca_undef(bld_base->base.gallivm, 861 bld_base->base.vec_type, 862 name); 863 } 864 } else { 865 LLVMValueRef idxs[2] = { 866 bld_base->uint_bld.zero, 867 NULL 868 }; 869 unsigned j = 0; 870 871 if (writemask != TGSI_WRITEMASK_XYZW && 872 !ctx->undef_alloca) { 873 /* Create a dummy alloca. We use it so that we 874 * have a pointer that is safe to load from if 875 * a shader ever reads from a channel that 876 * it never writes to. 877 */ 878 ctx->undef_alloca = lp_build_alloca_undef( 879 bld_base->base.gallivm, 880 bld_base->base.vec_type, "undef"); 881 } 882 883 for (i = 0; i < decl_size; ++i) { 884 LLVMValueRef ptr; 885 if (writemask & (1 << (i % 4))) { 886 #ifdef DEBUG 887 snprintf(name, sizeof(name), "TEMP%d.%c", 888 first + i / 4, "xyzw"[i % 4]); 889 #endif 890 idxs[1] = lp_build_const_int32(bld_base->base.gallivm, j); 891 ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name); 892 j++; 893 } else { 894 ptr = ctx->undef_alloca; 895 } 896 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr; 897 } 898 } 899 break; 900 } 901 case TGSI_FILE_INPUT: 902 { 903 unsigned idx; 904 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { 905 if (ctx->load_input && 906 ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) { 907 ctx->input_decls[idx] = *decl; 908 ctx->input_decls[idx].Range.First = idx; 909 ctx->input_decls[idx].Range.Last = idx; 910 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First; 911 912 if (si_preload_fs_inputs(ctx) || 913 bld_base->info->processor != PIPE_SHADER_FRAGMENT) 914 ctx->load_input(ctx, idx, &ctx->input_decls[idx], 915 &ctx->inputs[idx * 4]); 916 } 917 } 918 } 919 break; 920 921 case TGSI_FILE_SYSTEM_VALUE: 922 { 923 unsigned idx; 924 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { 925 ctx->load_system_value(ctx, idx, decl); 926 } 927 } 928 break; 929 930 case TGSI_FILE_OUTPUT: 931 { 932 char name[16] = ""; 933 unsigned idx; 934 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { 935 unsigned chan; 936 assert(idx < RADEON_LLVM_MAX_OUTPUTS); 937 if (ctx->outputs[idx][0]) 938 continue; 939 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 940 #ifdef DEBUG 941 snprintf(name, sizeof(name), "OUT%d.%c", 942 idx, "xyzw"[chan % 4]); 943 #endif 944 ctx->outputs[idx][chan] = lp_build_alloca_undef( 945 &ctx->gallivm, 946 ctx->bld_base.base.elem_type, name); 947 } 948 } 949 break; 950 } 951 952 case TGSI_FILE_MEMORY: 953 ctx->declare_memory_region(ctx, decl); 954 break; 955 956 default: 957 break; 958 } 959 } 960 961 LLVMValueRef si_llvm_saturate(struct lp_build_tgsi_context *bld_base, 962 LLVMValueRef value) 963 { 964 struct lp_build_emit_data clamp_emit_data; 965 966 memset(&clamp_emit_data, 0, sizeof(clamp_emit_data)); 967 clamp_emit_data.arg_count = 3; 968 clamp_emit_data.args[0] = value; 969 clamp_emit_data.args[2] = bld_base->base.one; 970 clamp_emit_data.args[1] = bld_base->base.zero; 971 972 return lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP, 973 &clamp_emit_data); 974 } 975 976 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base, 977 const struct tgsi_full_instruction *inst, 978 const struct tgsi_opcode_info *info, 979 LLVMValueRef dst[4]) 980 { 981 struct si_shader_context *ctx = si_shader_context(bld_base); 982 struct gallivm_state *gallivm = ctx->bld_base.base.gallivm; 983 const struct tgsi_full_dst_register *reg = &inst->Dst[0]; 984 LLVMBuilderRef builder = ctx->bld_base.base.gallivm->builder; 985 LLVMValueRef temp_ptr, temp_ptr2 = NULL; 986 unsigned chan, chan_index; 987 bool is_vec_store = false; 988 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode); 989 990 if (dst[0]) { 991 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0])); 992 is_vec_store = (k == LLVMVectorTypeKind); 993 } 994 995 if (is_vec_store) { 996 LLVMValueRef values[4] = {}; 997 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) { 998 LLVMValueRef index = lp_build_const_int32(gallivm, chan); 999 values[chan] = LLVMBuildExtractElement(gallivm->builder, 1000 dst[0], index, ""); 1001 } 1002 bld_base->emit_store(bld_base, inst, info, values); 1003 return; 1004 } 1005 1006 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1007 LLVMValueRef value = dst[chan_index]; 1008 1009 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3)) 1010 continue; 1011 if (inst->Instruction.Saturate) 1012 value = si_llvm_saturate(bld_base, value); 1013 1014 if (reg->Register.File == TGSI_FILE_ADDRESS) { 1015 temp_ptr = ctx->addrs[reg->Register.Index][chan_index]; 1016 LLVMBuildStore(builder, value, temp_ptr); 1017 continue; 1018 } 1019 1020 if (!tgsi_type_is_64bit(dtype)) 1021 value = bitcast(bld_base, TGSI_TYPE_FLOAT, value); 1022 1023 if (reg->Register.Indirect) { 1024 unsigned file = reg->Register.File; 1025 unsigned reg_index = reg->Register.Index; 1026 store_value_to_array(bld_base, value, file, chan_index, 1027 reg_index, ®->Indirect); 1028 } else { 1029 switch(reg->Register.File) { 1030 case TGSI_FILE_OUTPUT: 1031 temp_ptr = ctx->outputs[reg->Register.Index][chan_index]; 1032 if (tgsi_type_is_64bit(dtype)) 1033 temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1]; 1034 break; 1035 1036 case TGSI_FILE_TEMPORARY: 1037 { 1038 if (reg->Register.Index >= ctx->temps_count) 1039 continue; 1040 1041 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index]; 1042 if (tgsi_type_is_64bit(dtype)) 1043 temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1]; 1044 1045 break; 1046 } 1047 default: 1048 return; 1049 } 1050 if (!tgsi_type_is_64bit(dtype)) 1051 LLVMBuildStore(builder, value, temp_ptr); 1052 else { 1053 LLVMValueRef ptr = LLVMBuildBitCast(builder, value, 1054 LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), 2), ""); 1055 LLVMValueRef val2; 1056 value = LLVMBuildExtractElement(builder, ptr, 1057 bld_base->uint_bld.zero, ""); 1058 val2 = LLVMBuildExtractElement(builder, ptr, 1059 bld_base->uint_bld.one, ""); 1060 1061 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr); 1062 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2); 1063 } 1064 } 1065 } 1066 } 1067 1068 static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc) 1069 { 1070 char buf[32]; 1071 /* Subtract 1 so that the number shown is that of the corresponding 1072 * opcode in the TGSI dump, e.g. an if block has the same suffix as 1073 * the instruction number of the corresponding TGSI IF. 1074 */ 1075 snprintf(buf, sizeof(buf), "%s%d", base, pc - 1); 1076 LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf); 1077 } 1078 1079 /* Append a basic block at the level of the parent flow. 1080 */ 1081 static LLVMBasicBlockRef append_basic_block(struct si_shader_context *ctx, 1082 const char *name) 1083 { 1084 struct gallivm_state *gallivm = &ctx->gallivm; 1085 1086 assert(ctx->flow_depth >= 1); 1087 1088 if (ctx->flow_depth >= 2) { 1089 struct si_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2]; 1090 1091 return LLVMInsertBasicBlockInContext(gallivm->context, 1092 flow->next_block, name); 1093 } 1094 1095 return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name); 1096 } 1097 1098 /* Emit a branch to the given default target for the current block if 1099 * applicable -- that is, if the current block does not already contain a 1100 * branch from a break or continue. 1101 */ 1102 static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target) 1103 { 1104 if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder))) 1105 LLVMBuildBr(builder, target); 1106 } 1107 1108 static void bgnloop_emit(const struct lp_build_tgsi_action *action, 1109 struct lp_build_tgsi_context *bld_base, 1110 struct lp_build_emit_data *emit_data) 1111 { 1112 struct si_shader_context *ctx = si_shader_context(bld_base); 1113 struct gallivm_state *gallivm = bld_base->base.gallivm; 1114 struct si_llvm_flow *flow = push_flow(ctx); 1115 flow->loop_entry_block = append_basic_block(ctx, "LOOP"); 1116 flow->next_block = append_basic_block(ctx, "ENDLOOP"); 1117 set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc); 1118 LLVMBuildBr(gallivm->builder, flow->loop_entry_block); 1119 LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block); 1120 } 1121 1122 static void brk_emit(const struct lp_build_tgsi_action *action, 1123 struct lp_build_tgsi_context *bld_base, 1124 struct lp_build_emit_data *emit_data) 1125 { 1126 struct si_shader_context *ctx = si_shader_context(bld_base); 1127 struct gallivm_state *gallivm = bld_base->base.gallivm; 1128 struct si_llvm_flow *flow = get_innermost_loop(ctx); 1129 1130 LLVMBuildBr(gallivm->builder, flow->next_block); 1131 } 1132 1133 static void cont_emit(const struct lp_build_tgsi_action *action, 1134 struct lp_build_tgsi_context *bld_base, 1135 struct lp_build_emit_data *emit_data) 1136 { 1137 struct si_shader_context *ctx = si_shader_context(bld_base); 1138 struct gallivm_state *gallivm = bld_base->base.gallivm; 1139 struct si_llvm_flow *flow = get_innermost_loop(ctx); 1140 1141 LLVMBuildBr(gallivm->builder, flow->loop_entry_block); 1142 } 1143 1144 static void else_emit(const struct lp_build_tgsi_action *action, 1145 struct lp_build_tgsi_context *bld_base, 1146 struct lp_build_emit_data *emit_data) 1147 { 1148 struct si_shader_context *ctx = si_shader_context(bld_base); 1149 struct gallivm_state *gallivm = bld_base->base.gallivm; 1150 struct si_llvm_flow *current_branch = get_current_flow(ctx); 1151 LLVMBasicBlockRef endif_block; 1152 1153 assert(!current_branch->loop_entry_block); 1154 1155 endif_block = append_basic_block(ctx, "ENDIF"); 1156 emit_default_branch(gallivm->builder, endif_block); 1157 1158 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block); 1159 set_basicblock_name(current_branch->next_block, "else", bld_base->pc); 1160 1161 current_branch->next_block = endif_block; 1162 } 1163 1164 static void endif_emit(const struct lp_build_tgsi_action *action, 1165 struct lp_build_tgsi_context *bld_base, 1166 struct lp_build_emit_data *emit_data) 1167 { 1168 struct si_shader_context *ctx = si_shader_context(bld_base); 1169 struct gallivm_state *gallivm = bld_base->base.gallivm; 1170 struct si_llvm_flow *current_branch = get_current_flow(ctx); 1171 1172 assert(!current_branch->loop_entry_block); 1173 1174 emit_default_branch(gallivm->builder, current_branch->next_block); 1175 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block); 1176 set_basicblock_name(current_branch->next_block, "endif", bld_base->pc); 1177 1178 ctx->flow_depth--; 1179 } 1180 1181 static void endloop_emit(const struct lp_build_tgsi_action *action, 1182 struct lp_build_tgsi_context *bld_base, 1183 struct lp_build_emit_data *emit_data) 1184 { 1185 struct si_shader_context *ctx = si_shader_context(bld_base); 1186 struct gallivm_state *gallivm = bld_base->base.gallivm; 1187 struct si_llvm_flow *current_loop = get_current_flow(ctx); 1188 1189 assert(current_loop->loop_entry_block); 1190 1191 emit_default_branch(gallivm->builder, current_loop->loop_entry_block); 1192 1193 LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block); 1194 set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc); 1195 ctx->flow_depth--; 1196 } 1197 1198 static void if_cond_emit(const struct lp_build_tgsi_action *action, 1199 struct lp_build_tgsi_context *bld_base, 1200 struct lp_build_emit_data *emit_data, 1201 LLVMValueRef cond) 1202 { 1203 struct si_shader_context *ctx = si_shader_context(bld_base); 1204 struct gallivm_state *gallivm = bld_base->base.gallivm; 1205 struct si_llvm_flow *flow = push_flow(ctx); 1206 LLVMBasicBlockRef if_block; 1207 1208 if_block = append_basic_block(ctx, "IF"); 1209 flow->next_block = append_basic_block(ctx, "ELSE"); 1210 set_basicblock_name(if_block, "if", bld_base->pc); 1211 LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block); 1212 LLVMPositionBuilderAtEnd(gallivm->builder, if_block); 1213 } 1214 1215 static void if_emit(const struct lp_build_tgsi_action *action, 1216 struct lp_build_tgsi_context *bld_base, 1217 struct lp_build_emit_data *emit_data) 1218 { 1219 struct gallivm_state *gallivm = bld_base->base.gallivm; 1220 LLVMValueRef cond; 1221 1222 cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE, 1223 emit_data->args[0], 1224 bld_base->base.zero, ""); 1225 1226 if_cond_emit(action, bld_base, emit_data, cond); 1227 } 1228 1229 static void uif_emit(const struct lp_build_tgsi_action *action, 1230 struct lp_build_tgsi_context *bld_base, 1231 struct lp_build_emit_data *emit_data) 1232 { 1233 struct gallivm_state *gallivm = bld_base->base.gallivm; 1234 LLVMValueRef cond; 1235 1236 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, 1237 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]), 1238 bld_base->int_bld.zero, ""); 1239 1240 if_cond_emit(action, bld_base, emit_data, cond); 1241 } 1242 1243 static void emit_immediate(struct lp_build_tgsi_context *bld_base, 1244 const struct tgsi_full_immediate *imm) 1245 { 1246 unsigned i; 1247 struct si_shader_context *ctx = si_shader_context(bld_base); 1248 1249 for (i = 0; i < 4; ++i) { 1250 ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] = 1251 LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false ); 1252 } 1253 1254 ctx->imms_num++; 1255 } 1256 1257 void si_llvm_context_init(struct si_shader_context *ctx, 1258 struct si_screen *sscreen, 1259 struct si_shader *shader, 1260 LLVMTargetMachineRef tm, 1261 const struct tgsi_shader_info *info, 1262 const struct tgsi_token *tokens) 1263 { 1264 struct lp_type type; 1265 1266 /* Initialize the gallivm object: 1267 * We are only using the module, context, and builder fields of this struct. 1268 * This should be enough for us to be able to pass our gallivm struct to the 1269 * helper functions in the gallivm module. 1270 */ 1271 memset(ctx, 0, sizeof(*ctx)); 1272 ctx->shader = shader; 1273 ctx->screen = sscreen; 1274 ctx->tm = tm; 1275 ctx->type = info ? info->processor : -1; 1276 1277 ctx->gallivm.context = LLVMContextCreate(); 1278 ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi", 1279 ctx->gallivm.context); 1280 LLVMSetTarget(ctx->gallivm.module, "amdgcn--"); 1281 1282 bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0; 1283 ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context, 1284 unsafe_fpmath); 1285 1286 ac_llvm_context_init(&ctx->ac, ctx->gallivm.context); 1287 ctx->ac.module = ctx->gallivm.module; 1288 ctx->ac.builder = ctx->gallivm.builder; 1289 1290 struct lp_build_tgsi_context *bld_base = &ctx->bld_base; 1291 1292 bld_base->info = info; 1293 1294 if (info && info->array_max[TGSI_FILE_TEMPORARY] > 0) { 1295 int size = info->array_max[TGSI_FILE_TEMPORARY]; 1296 1297 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0])); 1298 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0])); 1299 1300 if (tokens) 1301 tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size, 1302 ctx->temp_arrays); 1303 } 1304 1305 if (info && info->file_max[TGSI_FILE_IMMEDIATE] >= 0) { 1306 int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1; 1307 ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef)); 1308 } 1309 1310 type.floating = true; 1311 type.fixed = false; 1312 type.sign = true; 1313 type.norm = false; 1314 type.width = 32; 1315 type.length = 1; 1316 1317 lp_build_context_init(&bld_base->base, &ctx->gallivm, type); 1318 lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type)); 1319 lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type)); 1320 type.width *= 2; 1321 lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type); 1322 lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type)); 1323 lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type)); 1324 1325 bld_base->soa = 1; 1326 bld_base->emit_store = si_llvm_emit_store; 1327 bld_base->emit_swizzle = emit_swizzle; 1328 bld_base->emit_declaration = emit_declaration; 1329 bld_base->emit_immediate = emit_immediate; 1330 1331 bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch; 1332 bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch; 1333 bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch; 1334 bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch; 1335 bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value; 1336 1337 /* metadata allowing 2.5 ULP */ 1338 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context, 1339 "fpmath", 6); 1340 LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5); 1341 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context, 1342 &arg, 1); 1343 1344 bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; 1345 bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit; 1346 bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit; 1347 bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit; 1348 bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit; 1349 bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit; 1350 bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; 1351 bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; 1352 1353 si_shader_context_init_alu(&ctx->bld_base); 1354 1355 ctx->voidt = LLVMVoidTypeInContext(ctx->gallivm.context); 1356 ctx->i1 = LLVMInt1TypeInContext(ctx->gallivm.context); 1357 ctx->i8 = LLVMInt8TypeInContext(ctx->gallivm.context); 1358 ctx->i32 = LLVMInt32TypeInContext(ctx->gallivm.context); 1359 ctx->i64 = LLVMInt64TypeInContext(ctx->gallivm.context); 1360 ctx->i128 = LLVMIntTypeInContext(ctx->gallivm.context, 128); 1361 ctx->f32 = LLVMFloatTypeInContext(ctx->gallivm.context); 1362 ctx->v16i8 = LLVMVectorType(ctx->i8, 16); 1363 ctx->v2i32 = LLVMVectorType(ctx->i32, 2); 1364 ctx->v4i32 = LLVMVectorType(ctx->i32, 4); 1365 ctx->v4f32 = LLVMVectorType(ctx->f32, 4); 1366 ctx->v8i32 = LLVMVectorType(ctx->i32, 8); 1367 } 1368 1369 void si_llvm_create_func(struct si_shader_context *ctx, 1370 const char *name, 1371 LLVMTypeRef *return_types, unsigned num_return_elems, 1372 LLVMTypeRef *ParamTypes, unsigned ParamCount) 1373 { 1374 LLVMTypeRef main_fn_type, ret_type; 1375 LLVMBasicBlockRef main_fn_body; 1376 1377 if (num_return_elems) 1378 ret_type = LLVMStructTypeInContext(ctx->gallivm.context, 1379 return_types, 1380 num_return_elems, true); 1381 else 1382 ret_type = LLVMVoidTypeInContext(ctx->gallivm.context); 1383 1384 /* Setup the function */ 1385 ctx->return_type = ret_type; 1386 main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0); 1387 ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type); 1388 main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context, 1389 ctx->main_fn, "main_body"); 1390 LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body); 1391 } 1392 1393 void si_llvm_finalize_module(struct si_shader_context *ctx, 1394 bool run_verifier) 1395 { 1396 struct gallivm_state *gallivm = ctx->bld_base.base.gallivm; 1397 const char *triple = LLVMGetTarget(gallivm->module); 1398 LLVMTargetLibraryInfoRef target_library_info; 1399 1400 /* Create the pass manager */ 1401 gallivm->passmgr = LLVMCreatePassManager(); 1402 1403 target_library_info = gallivm_create_target_library_info(triple); 1404 LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr); 1405 1406 if (run_verifier) 1407 LLVMAddVerifierPass(gallivm->passmgr); 1408 1409 LLVMAddAlwaysInlinerPass(gallivm->passmgr); 1410 1411 /* This pass should eliminate all the load and store instructions */ 1412 LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr); 1413 1414 /* Add some optimization passes */ 1415 LLVMAddScalarReplAggregatesPass(gallivm->passmgr); 1416 LLVMAddLICMPass(gallivm->passmgr); 1417 LLVMAddAggressiveDCEPass(gallivm->passmgr); 1418 LLVMAddCFGSimplificationPass(gallivm->passmgr); 1419 LLVMAddInstructionCombiningPass(gallivm->passmgr); 1420 1421 /* Run the pass */ 1422 LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module); 1423 1424 LLVMDisposeBuilder(gallivm->builder); 1425 LLVMDisposePassManager(gallivm->passmgr); 1426 gallivm_dispose_target_library_info(target_library_info); 1427 } 1428 1429 void si_llvm_dispose(struct si_shader_context *ctx) 1430 { 1431 LLVMDisposeModule(ctx->bld_base.base.gallivm->module); 1432 LLVMContextDispose(ctx->bld_base.base.gallivm->context); 1433 FREE(ctx->temp_arrays); 1434 ctx->temp_arrays = NULL; 1435 FREE(ctx->temp_array_allocas); 1436 ctx->temp_array_allocas = NULL; 1437 FREE(ctx->temps); 1438 ctx->temps = NULL; 1439 ctx->temps_count = 0; 1440 FREE(ctx->imms); 1441 ctx->imms = NULL; 1442 ctx->imms_num = 0; 1443 FREE(ctx->flow); 1444 ctx->flow = NULL; 1445 ctx->flow_depth_max = 0; 1446 } 1447