1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "si_shader_internal.h" 25 #include "si_pipe.h" 26 27 #include "gallivm/lp_bld_const.h" 28 #include "gallivm/lp_bld_gather.h" 29 #include "gallivm/lp_bld_flow.h" 30 #include "gallivm/lp_bld_init.h" 31 #include "gallivm/lp_bld_intr.h" 32 #include "gallivm/lp_bld_misc.h" 33 #include "gallivm/lp_bld_swizzle.h" 34 #include "tgsi/tgsi_info.h" 35 #include "tgsi/tgsi_parse.h" 36 #include "util/u_math.h" 37 #include "util/u_memory.h" 38 #include "util/u_debug.h" 39 40 #include <stdio.h> 41 #include <llvm-c/Transforms/IPO.h> 42 #include <llvm-c/Transforms/Scalar.h> 43 44 enum si_llvm_calling_convention { 45 RADEON_LLVM_AMDGPU_VS = 87, 46 RADEON_LLVM_AMDGPU_GS = 88, 47 RADEON_LLVM_AMDGPU_PS = 89, 48 RADEON_LLVM_AMDGPU_CS = 90, 49 RADEON_LLVM_AMDGPU_HS = 93, 50 }; 51 52 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value) 53 { 54 char str[16]; 55 56 snprintf(str, sizeof(str), "%i", value); 57 LLVMAddTargetDependentFunctionAttr(F, name, str); 58 } 59 60 struct si_llvm_diagnostics { 61 struct pipe_debug_callback *debug; 62 unsigned retval; 63 }; 64 65 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context) 66 { 67 struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context; 68 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di); 69 char *description = LLVMGetDiagInfoDescription(di); 70 const char *severity_str = NULL; 71 72 switch (severity) { 73 case LLVMDSError: 74 severity_str = "error"; 75 break; 76 case LLVMDSWarning: 77 severity_str = "warning"; 78 break; 79 case LLVMDSRemark: 80 severity_str = "remark"; 81 break; 82 case LLVMDSNote: 83 severity_str = "note"; 84 break; 85 default: 86 severity_str = "unknown"; 87 } 88 89 pipe_debug_message(diag->debug, SHADER_INFO, 90 "LLVM diagnostic (%s): %s", severity_str, description); 91 92 if (severity == LLVMDSError) { 93 diag->retval = 1; 94 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description); 95 } 96 97 LLVMDisposeMessage(description); 98 } 99 100 /** 101 * Compile an LLVM module to machine code. 102 * 103 * @returns 0 for success, 1 for failure 104 */ 105 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary, 106 LLVMTargetMachineRef tm, 107 struct pipe_debug_callback *debug) 108 { 109 struct si_llvm_diagnostics diag; 110 char *err; 111 LLVMContextRef llvm_ctx; 112 LLVMMemoryBufferRef out_buffer; 113 unsigned buffer_size; 114 const char *buffer_data; 115 LLVMBool mem_err; 116 117 diag.debug = debug; 118 diag.retval = 0; 119 120 /* Setup Diagnostic Handler*/ 121 llvm_ctx = LLVMGetModuleContext(M); 122 123 LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag); 124 125 /* Compile IR*/ 126 mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err, 127 &out_buffer); 128 129 /* Process Errors/Warnings */ 130 if (mem_err) { 131 fprintf(stderr, "%s: %s", __FUNCTION__, err); 132 pipe_debug_message(debug, SHADER_INFO, 133 "LLVM emit error: %s", err); 134 FREE(err); 135 diag.retval = 1; 136 goto out; 137 } 138 139 /* Extract Shader Code*/ 140 buffer_size = LLVMGetBufferSize(out_buffer); 141 buffer_data = LLVMGetBufferStart(out_buffer); 142 143 if (!ac_elf_read(buffer_data, buffer_size, binary)) { 144 fprintf(stderr, "radeonsi: cannot read an ELF shader binary\n"); 145 diag.retval = 1; 146 } 147 148 /* Clean up */ 149 LLVMDisposeMemoryBuffer(out_buffer); 150 151 out: 152 if (diag.retval != 0) 153 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed"); 154 return diag.retval; 155 } 156 157 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base, 158 enum tgsi_opcode_type type) 159 { 160 struct si_shader_context *ctx = si_shader_context(bld_base); 161 162 switch (type) { 163 case TGSI_TYPE_UNSIGNED: 164 case TGSI_TYPE_SIGNED: 165 return ctx->ac.i32; 166 case TGSI_TYPE_UNSIGNED64: 167 case TGSI_TYPE_SIGNED64: 168 return ctx->ac.i64; 169 case TGSI_TYPE_DOUBLE: 170 return ctx->ac.f64; 171 case TGSI_TYPE_UNTYPED: 172 case TGSI_TYPE_FLOAT: 173 return ctx->ac.f32; 174 default: break; 175 } 176 return 0; 177 } 178 179 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base, 180 enum tgsi_opcode_type type, LLVMValueRef value) 181 { 182 struct si_shader_context *ctx = si_shader_context(bld_base); 183 LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type); 184 185 if (dst_type) 186 return LLVMBuildBitCast(ctx->ac.builder, value, dst_type, ""); 187 else 188 return value; 189 } 190 191 /** 192 * Return a value that is equal to the given i32 \p index if it lies in [0,num) 193 * or an undefined value in the same interval otherwise. 194 */ 195 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx, 196 LLVMValueRef index, 197 unsigned num) 198 { 199 LLVMBuilderRef builder = ctx->ac.builder; 200 LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0); 201 LLVMValueRef cc; 202 203 if (util_is_power_of_two(num)) { 204 index = LLVMBuildAnd(builder, index, c_max, ""); 205 } else { 206 /* In theory, this MAX pattern should result in code that is 207 * as good as the bit-wise AND above. 208 * 209 * In practice, LLVM generates worse code (at the time of 210 * writing), because its value tracking is not strong enough. 211 */ 212 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, ""); 213 index = LLVMBuildSelect(builder, cc, index, c_max, ""); 214 } 215 216 return index; 217 } 218 219 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base, 220 LLVMValueRef value, 221 unsigned swizzle_x, 222 unsigned swizzle_y, 223 unsigned swizzle_z, 224 unsigned swizzle_w) 225 { 226 struct si_shader_context *ctx = si_shader_context(bld_base); 227 LLVMValueRef swizzles[4]; 228 229 swizzles[0] = LLVMConstInt(ctx->i32, swizzle_x, 0); 230 swizzles[1] = LLVMConstInt(ctx->i32, swizzle_y, 0); 231 swizzles[2] = LLVMConstInt(ctx->i32, swizzle_z, 0); 232 swizzles[3] = LLVMConstInt(ctx->i32, swizzle_w, 0); 233 234 return LLVMBuildShuffleVector(ctx->ac.builder, 235 value, 236 LLVMGetUndef(LLVMTypeOf(value)), 237 LLVMConstVector(swizzles, 4), ""); 238 } 239 240 /** 241 * Return the description of the array covering the given temporary register 242 * index. 243 */ 244 static unsigned 245 get_temp_array_id(struct lp_build_tgsi_context *bld_base, 246 unsigned reg_index, 247 const struct tgsi_ind_register *reg) 248 { 249 struct si_shader_context *ctx = si_shader_context(bld_base); 250 unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY]; 251 unsigned i; 252 253 if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays) 254 return reg->ArrayID; 255 256 for (i = 0; i < num_arrays; i++) { 257 const struct tgsi_array_info *array = &ctx->temp_arrays[i]; 258 259 if (reg_index >= array->range.First && reg_index <= array->range.Last) 260 return i + 1; 261 } 262 263 return 0; 264 } 265 266 static struct tgsi_declaration_range 267 get_array_range(struct lp_build_tgsi_context *bld_base, 268 unsigned File, unsigned reg_index, 269 const struct tgsi_ind_register *reg) 270 { 271 struct si_shader_context *ctx = si_shader_context(bld_base); 272 struct tgsi_declaration_range range; 273 274 if (File == TGSI_FILE_TEMPORARY) { 275 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg); 276 if (array_id) 277 return ctx->temp_arrays[array_id - 1].range; 278 } 279 280 range.First = 0; 281 range.Last = bld_base->info->file_max[File]; 282 return range; 283 } 284 285 /** 286 * For indirect registers, construct a pointer directly to the requested 287 * element using getelementptr if possible. 288 * 289 * Returns NULL if the insertelement/extractelement fallback for array access 290 * must be used. 291 */ 292 static LLVMValueRef 293 get_pointer_into_array(struct si_shader_context *ctx, 294 unsigned file, 295 unsigned swizzle, 296 unsigned reg_index, 297 const struct tgsi_ind_register *reg_indirect) 298 { 299 unsigned array_id; 300 struct tgsi_array_info *array; 301 LLVMBuilderRef builder = ctx->ac.builder; 302 LLVMValueRef idxs[2]; 303 LLVMValueRef index; 304 LLVMValueRef alloca; 305 306 if (file != TGSI_FILE_TEMPORARY) 307 return NULL; 308 309 array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect); 310 if (!array_id) 311 return NULL; 312 313 alloca = ctx->temp_array_allocas[array_id - 1]; 314 if (!alloca) 315 return NULL; 316 317 array = &ctx->temp_arrays[array_id - 1]; 318 319 if (!(array->writemask & (1 << swizzle))) 320 return ctx->undef_alloca; 321 322 index = si_get_indirect_index(ctx, reg_indirect, 1, 323 reg_index - ctx->temp_arrays[array_id - 1].range.First); 324 325 /* Ensure that the index is within a valid range, to guard against 326 * VM faults and overwriting critical data (e.g. spilled resource 327 * descriptors). 328 * 329 * TODO It should be possible to avoid the additional instructions 330 * if LLVM is changed so that it guarantuees: 331 * 1. the scratch space descriptor isolates the current wave (this 332 * could even save the scratch offset SGPR at the cost of an 333 * additional SALU instruction) 334 * 2. the memory for allocas must be allocated at the _end_ of the 335 * scratch space (after spilled registers) 336 */ 337 index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1); 338 339 index = LLVMBuildMul( 340 builder, index, 341 LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0), 342 ""); 343 index = LLVMBuildAdd( 344 builder, index, 345 LLVMConstInt(ctx->i32, 346 util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0), 347 ""); 348 idxs[0] = ctx->i32_0; 349 idxs[1] = index; 350 return LLVMBuildGEP(ctx->ac.builder, alloca, idxs, 2, ""); 351 } 352 353 LLVMValueRef 354 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base, 355 LLVMTypeRef type, 356 LLVMValueRef ptr, 357 LLVMValueRef ptr2) 358 { 359 struct si_shader_context *ctx = si_shader_context(bld_base); 360 LLVMValueRef result; 361 362 result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2)); 363 364 result = LLVMBuildInsertElement(ctx->ac.builder, 365 result, 366 ac_to_integer(&ctx->ac, ptr), 367 ctx->i32_0, ""); 368 result = LLVMBuildInsertElement(ctx->ac.builder, 369 result, 370 ac_to_integer(&ctx->ac, ptr2), 371 ctx->i32_1, ""); 372 return LLVMBuildBitCast(ctx->ac.builder, result, type, ""); 373 } 374 375 static LLVMValueRef 376 emit_array_fetch(struct lp_build_tgsi_context *bld_base, 377 unsigned File, enum tgsi_opcode_type type, 378 struct tgsi_declaration_range range, 379 unsigned swizzle) 380 { 381 struct si_shader_context *ctx = si_shader_context(bld_base); 382 unsigned i, size = range.Last - range.First + 1; 383 LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size); 384 LLVMValueRef result = LLVMGetUndef(vec); 385 386 struct tgsi_full_src_register tmp_reg = {}; 387 tmp_reg.Register.File = File; 388 389 for (i = 0; i < size; ++i) { 390 tmp_reg.Register.Index = i + range.First; 391 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle); 392 result = LLVMBuildInsertElement(ctx->ac.builder, result, temp, 393 LLVMConstInt(ctx->i32, i, 0), "array_vector"); 394 } 395 return result; 396 } 397 398 static LLVMValueRef 399 load_value_from_array(struct lp_build_tgsi_context *bld_base, 400 unsigned file, 401 enum tgsi_opcode_type type, 402 unsigned swizzle, 403 unsigned reg_index, 404 const struct tgsi_ind_register *reg_indirect) 405 { 406 struct si_shader_context *ctx = si_shader_context(bld_base); 407 LLVMBuilderRef builder = ctx->ac.builder; 408 LLVMValueRef ptr; 409 410 ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect); 411 if (ptr) { 412 LLVMValueRef val = LLVMBuildLoad(builder, ptr, ""); 413 if (tgsi_type_is_64bit(type)) { 414 LLVMValueRef ptr_hi, val_hi; 415 ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, ""); 416 val_hi = LLVMBuildLoad(builder, ptr_hi, ""); 417 val = si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type), 418 val, val_hi); 419 } 420 421 return val; 422 } else { 423 struct tgsi_declaration_range range = 424 get_array_range(bld_base, file, reg_index, reg_indirect); 425 LLVMValueRef index = 426 si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First); 427 LLVMValueRef array = 428 emit_array_fetch(bld_base, file, type, range, swizzle); 429 return LLVMBuildExtractElement(builder, array, index, ""); 430 } 431 } 432 433 static void 434 store_value_to_array(struct lp_build_tgsi_context *bld_base, 435 LLVMValueRef value, 436 unsigned file, 437 unsigned chan_index, 438 unsigned reg_index, 439 const struct tgsi_ind_register *reg_indirect) 440 { 441 struct si_shader_context *ctx = si_shader_context(bld_base); 442 LLVMBuilderRef builder = ctx->ac.builder; 443 LLVMValueRef ptr; 444 445 ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect); 446 if (ptr) { 447 LLVMBuildStore(builder, value, ptr); 448 } else { 449 unsigned i, size; 450 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect); 451 LLVMValueRef index = si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First); 452 LLVMValueRef array = 453 emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index); 454 LLVMValueRef temp_ptr; 455 456 array = LLVMBuildInsertElement(builder, array, value, index, ""); 457 458 size = range.Last - range.First + 1; 459 for (i = 0; i < size; ++i) { 460 switch(file) { 461 case TGSI_FILE_OUTPUT: 462 temp_ptr = ctx->outputs[i + range.First][chan_index]; 463 break; 464 465 case TGSI_FILE_TEMPORARY: 466 if (range.First + i >= ctx->temps_count) 467 continue; 468 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index]; 469 break; 470 471 default: 472 continue; 473 } 474 value = LLVMBuildExtractElement(builder, array, 475 LLVMConstInt(ctx->i32, i, 0), ""); 476 LLVMBuildStore(builder, value, temp_ptr); 477 } 478 } 479 } 480 481 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise, 482 * reload them at each use. This must be true if the shader is using 483 * derivatives and KILL, because KILL can leave the WQM and then a lazy 484 * input load isn't in the WQM anymore. 485 */ 486 static bool si_preload_fs_inputs(struct si_shader_context *ctx) 487 { 488 struct si_shader_selector *sel = ctx->shader->selector; 489 490 return sel->info.uses_derivatives && 491 sel->info.uses_kill; 492 } 493 494 static LLVMValueRef 495 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index, 496 unsigned chan) 497 { 498 struct si_shader_context *ctx = si_shader_context(bld_base); 499 500 assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]); 501 return ctx->outputs[index][chan]; 502 } 503 504 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base, 505 const struct tgsi_full_src_register *reg, 506 enum tgsi_opcode_type type, 507 unsigned swizzle) 508 { 509 struct si_shader_context *ctx = si_shader_context(bld_base); 510 LLVMBuilderRef builder = ctx->ac.builder; 511 LLVMValueRef result = NULL, ptr, ptr2; 512 513 if (swizzle == ~0) { 514 LLVMValueRef values[TGSI_NUM_CHANNELS]; 515 unsigned chan; 516 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 517 values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan); 518 } 519 return lp_build_gather_values(&ctx->gallivm, values, 520 TGSI_NUM_CHANNELS); 521 } 522 523 if (reg->Register.Indirect) { 524 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type, 525 swizzle, reg->Register.Index, ®->Indirect); 526 return bitcast(bld_base, type, load); 527 } 528 529 switch(reg->Register.File) { 530 case TGSI_FILE_IMMEDIATE: { 531 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type); 532 if (tgsi_type_is_64bit(type)) { 533 result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2)); 534 result = LLVMConstInsertElement(result, 535 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], 536 ctx->i32_0); 537 result = LLVMConstInsertElement(result, 538 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1], 539 ctx->i32_1); 540 return LLVMConstBitCast(result, ctype); 541 } else { 542 return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype); 543 } 544 } 545 546 case TGSI_FILE_INPUT: { 547 unsigned index = reg->Register.Index; 548 LLVMValueRef input[4]; 549 550 /* I don't think doing this for vertex shaders is beneficial. 551 * For those, we want to make sure the VMEM loads are executed 552 * only once. Fragment shaders don't care much, because 553 * v_interp instructions are much cheaper than VMEM loads. 554 */ 555 if (!si_preload_fs_inputs(ctx) && 556 ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT) 557 ctx->load_input(ctx, index, &ctx->input_decls[index], input); 558 else 559 memcpy(input, &ctx->inputs[index * 4], sizeof(input)); 560 561 result = input[swizzle]; 562 563 if (tgsi_type_is_64bit(type)) { 564 ptr = result; 565 ptr2 = input[swizzle + 1]; 566 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type), 567 ptr, ptr2); 568 } 569 break; 570 } 571 572 case TGSI_FILE_TEMPORARY: 573 if (reg->Register.Index >= ctx->temps_count) 574 return LLVMGetUndef(tgsi2llvmtype(bld_base, type)); 575 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle]; 576 if (tgsi_type_is_64bit(type)) { 577 ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1]; 578 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type), 579 LLVMBuildLoad(builder, ptr, ""), 580 LLVMBuildLoad(builder, ptr2, "")); 581 } 582 result = LLVMBuildLoad(builder, ptr, ""); 583 break; 584 585 case TGSI_FILE_OUTPUT: 586 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle); 587 if (tgsi_type_is_64bit(type)) { 588 ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1); 589 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type), 590 LLVMBuildLoad(builder, ptr, ""), 591 LLVMBuildLoad(builder, ptr2, "")); 592 } 593 result = LLVMBuildLoad(builder, ptr, ""); 594 break; 595 596 default: 597 return LLVMGetUndef(tgsi2llvmtype(bld_base, type)); 598 } 599 600 return bitcast(bld_base, type, result); 601 } 602 603 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base, 604 const struct tgsi_full_src_register *reg, 605 enum tgsi_opcode_type type, 606 unsigned swizzle) 607 { 608 struct si_shader_context *ctx = si_shader_context(bld_base); 609 LLVMBuilderRef builder = ctx->ac.builder; 610 LLVMValueRef cval = ctx->system_values[reg->Register.Index]; 611 612 if (tgsi_type_is_64bit(type)) { 613 LLVMValueRef lo, hi; 614 615 assert(swizzle == 0 || swizzle == 2); 616 617 lo = LLVMBuildExtractElement( 618 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), ""); 619 hi = LLVMBuildExtractElement( 620 builder, cval, LLVMConstInt(ctx->i32, swizzle + 1, 0), ""); 621 622 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type), 623 lo, hi); 624 } 625 626 if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) { 627 cval = LLVMBuildExtractElement( 628 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), ""); 629 } else { 630 assert(swizzle == 0); 631 } 632 633 return bitcast(bld_base, type, cval); 634 } 635 636 static void emit_declaration(struct lp_build_tgsi_context *bld_base, 637 const struct tgsi_full_declaration *decl) 638 { 639 struct si_shader_context *ctx = si_shader_context(bld_base); 640 LLVMBuilderRef builder = ctx->ac.builder; 641 unsigned first, last, i; 642 switch(decl->Declaration.File) { 643 case TGSI_FILE_ADDRESS: 644 { 645 unsigned idx; 646 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { 647 unsigned chan; 648 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 649 ctx->addrs[idx][chan] = lp_build_alloca_undef( 650 &ctx->gallivm, 651 ctx->i32, ""); 652 } 653 } 654 break; 655 } 656 657 case TGSI_FILE_TEMPORARY: 658 { 659 char name[16] = ""; 660 LLVMValueRef array_alloca = NULL; 661 unsigned decl_size; 662 unsigned writemask = decl->Declaration.UsageMask; 663 first = decl->Range.First; 664 last = decl->Range.Last; 665 decl_size = 4 * ((last - first) + 1); 666 667 if (decl->Declaration.Array) { 668 unsigned id = decl->Array.ArrayID - 1; 669 unsigned array_size; 670 671 writemask &= ctx->temp_arrays[id].writemask; 672 ctx->temp_arrays[id].writemask = writemask; 673 array_size = ((last - first) + 1) * util_bitcount(writemask); 674 675 /* If the array has more than 16 elements, store it 676 * in memory using an alloca that spans the entire 677 * array. 678 * 679 * Otherwise, store each array element individually. 680 * We will then generate vectors (per-channel, up to 681 * <16 x float> if the usagemask is a single bit) for 682 * indirect addressing. 683 * 684 * Note that 16 is the number of vector elements that 685 * LLVM will store in a register, so theoretically an 686 * array with up to 4 * 16 = 64 elements could be 687 * handled this way, but whether that's a good idea 688 * depends on VGPR register pressure elsewhere. 689 * 690 * FIXME: We shouldn't need to have the non-alloca 691 * code path for arrays. LLVM should be smart enough to 692 * promote allocas into registers when profitable. 693 */ 694 if (array_size > 16 || 695 !ctx->screen->llvm_has_working_vgpr_indexing) { 696 array_alloca = lp_build_alloca_undef(&ctx->gallivm, 697 LLVMArrayType(ctx->f32, 698 array_size), "array"); 699 ctx->temp_array_allocas[id] = array_alloca; 700 } 701 } 702 703 if (!ctx->temps_count) { 704 ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1; 705 ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef)); 706 } 707 if (!array_alloca) { 708 for (i = 0; i < decl_size; ++i) { 709 #ifdef DEBUG 710 snprintf(name, sizeof(name), "TEMP%d.%c", 711 first + i / 4, "xyzw"[i % 4]); 712 #endif 713 ctx->temps[first * TGSI_NUM_CHANNELS + i] = 714 lp_build_alloca_undef(&ctx->gallivm, 715 ctx->f32, 716 name); 717 } 718 } else { 719 LLVMValueRef idxs[2] = { 720 ctx->i32_0, 721 NULL 722 }; 723 unsigned j = 0; 724 725 if (writemask != TGSI_WRITEMASK_XYZW && 726 !ctx->undef_alloca) { 727 /* Create a dummy alloca. We use it so that we 728 * have a pointer that is safe to load from if 729 * a shader ever reads from a channel that 730 * it never writes to. 731 */ 732 ctx->undef_alloca = lp_build_alloca_undef( 733 &ctx->gallivm, 734 ctx->f32, "undef"); 735 } 736 737 for (i = 0; i < decl_size; ++i) { 738 LLVMValueRef ptr; 739 if (writemask & (1 << (i % 4))) { 740 #ifdef DEBUG 741 snprintf(name, sizeof(name), "TEMP%d.%c", 742 first + i / 4, "xyzw"[i % 4]); 743 #endif 744 idxs[1] = LLVMConstInt(ctx->i32, j, 0); 745 ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name); 746 j++; 747 } else { 748 ptr = ctx->undef_alloca; 749 } 750 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr; 751 } 752 } 753 break; 754 } 755 case TGSI_FILE_INPUT: 756 { 757 unsigned idx; 758 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { 759 if (ctx->load_input && 760 ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) { 761 ctx->input_decls[idx] = *decl; 762 ctx->input_decls[idx].Range.First = idx; 763 ctx->input_decls[idx].Range.Last = idx; 764 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First; 765 766 if (si_preload_fs_inputs(ctx) || 767 bld_base->info->processor != PIPE_SHADER_FRAGMENT) 768 ctx->load_input(ctx, idx, &ctx->input_decls[idx], 769 &ctx->inputs[idx * 4]); 770 } 771 } 772 } 773 break; 774 775 case TGSI_FILE_SYSTEM_VALUE: 776 { 777 unsigned idx; 778 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { 779 si_load_system_value(ctx, idx, decl); 780 } 781 } 782 break; 783 784 case TGSI_FILE_OUTPUT: 785 { 786 char name[16] = ""; 787 unsigned idx; 788 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { 789 unsigned chan; 790 assert(idx < RADEON_LLVM_MAX_OUTPUTS); 791 if (ctx->outputs[idx][0]) 792 continue; 793 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 794 #ifdef DEBUG 795 snprintf(name, sizeof(name), "OUT%d.%c", 796 idx, "xyzw"[chan % 4]); 797 #endif 798 ctx->outputs[idx][chan] = lp_build_alloca_undef( 799 &ctx->gallivm, 800 ctx->f32, name); 801 } 802 } 803 break; 804 } 805 806 case TGSI_FILE_MEMORY: 807 si_declare_compute_memory(ctx, decl); 808 break; 809 810 default: 811 break; 812 } 813 } 814 815 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base, 816 const struct tgsi_full_instruction *inst, 817 const struct tgsi_opcode_info *info, 818 unsigned index, 819 LLVMValueRef dst[4]) 820 { 821 struct si_shader_context *ctx = si_shader_context(bld_base); 822 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 823 LLVMBuilderRef builder = ctx->ac.builder; 824 LLVMValueRef temp_ptr, temp_ptr2 = NULL; 825 bool is_vec_store = false; 826 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index); 827 828 if (dst[0]) { 829 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0])); 830 is_vec_store = (k == LLVMVectorTypeKind); 831 } 832 833 if (is_vec_store) { 834 LLVMValueRef values[4] = {}; 835 uint32_t writemask = reg->Register.WriteMask; 836 while (writemask) { 837 unsigned chan = u_bit_scan(&writemask); 838 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0); 839 values[chan] = LLVMBuildExtractElement(ctx->ac.builder, 840 dst[0], index, ""); 841 } 842 bld_base->emit_store(bld_base, inst, info, index, values); 843 return; 844 } 845 846 uint32_t writemask = reg->Register.WriteMask; 847 while (writemask) { 848 unsigned chan_index = u_bit_scan(&writemask); 849 LLVMValueRef value = dst[chan_index]; 850 851 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3)) 852 continue; 853 if (inst->Instruction.Saturate) 854 value = ac_build_clamp(&ctx->ac, value); 855 856 if (reg->Register.File == TGSI_FILE_ADDRESS) { 857 temp_ptr = ctx->addrs[reg->Register.Index][chan_index]; 858 LLVMBuildStore(builder, value, temp_ptr); 859 continue; 860 } 861 862 if (!tgsi_type_is_64bit(dtype)) 863 value = ac_to_float(&ctx->ac, value); 864 865 if (reg->Register.Indirect) { 866 unsigned file = reg->Register.File; 867 unsigned reg_index = reg->Register.Index; 868 store_value_to_array(bld_base, value, file, chan_index, 869 reg_index, ®->Indirect); 870 } else { 871 switch(reg->Register.File) { 872 case TGSI_FILE_OUTPUT: 873 temp_ptr = ctx->outputs[reg->Register.Index][chan_index]; 874 if (tgsi_type_is_64bit(dtype)) 875 temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1]; 876 break; 877 878 case TGSI_FILE_TEMPORARY: 879 { 880 if (reg->Register.Index >= ctx->temps_count) 881 continue; 882 883 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index]; 884 if (tgsi_type_is_64bit(dtype)) 885 temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1]; 886 887 break; 888 } 889 default: 890 return; 891 } 892 if (!tgsi_type_is_64bit(dtype)) 893 LLVMBuildStore(builder, value, temp_ptr); 894 else { 895 LLVMValueRef ptr = LLVMBuildBitCast(builder, value, 896 LLVMVectorType(ctx->i32, 2), ""); 897 LLVMValueRef val2; 898 value = LLVMBuildExtractElement(builder, ptr, 899 ctx->i32_0, ""); 900 val2 = LLVMBuildExtractElement(builder, ptr, 901 ctx->i32_1, ""); 902 903 LLVMBuildStore(builder, ac_to_float(&ctx->ac, value), temp_ptr); 904 LLVMBuildStore(builder, ac_to_float(&ctx->ac, val2), temp_ptr2); 905 } 906 } 907 } 908 } 909 910 static int get_line(int pc) 911 { 912 /* Subtract 1 so that the number shown is that of the corresponding 913 * opcode in the TGSI dump, e.g. an if block has the same suffix as 914 * the instruction number of the corresponding TGSI IF. 915 */ 916 return pc - 1; 917 } 918 919 static void bgnloop_emit(const struct lp_build_tgsi_action *action, 920 struct lp_build_tgsi_context *bld_base, 921 struct lp_build_emit_data *emit_data) 922 { 923 struct si_shader_context *ctx = si_shader_context(bld_base); 924 ac_build_bgnloop(&ctx->ac, get_line(bld_base->pc)); 925 } 926 927 static void brk_emit(const struct lp_build_tgsi_action *action, 928 struct lp_build_tgsi_context *bld_base, 929 struct lp_build_emit_data *emit_data) 930 { 931 struct si_shader_context *ctx = si_shader_context(bld_base); 932 ac_build_break(&ctx->ac); 933 } 934 935 static void cont_emit(const struct lp_build_tgsi_action *action, 936 struct lp_build_tgsi_context *bld_base, 937 struct lp_build_emit_data *emit_data) 938 { 939 struct si_shader_context *ctx = si_shader_context(bld_base); 940 ac_build_continue(&ctx->ac); 941 } 942 943 static void else_emit(const struct lp_build_tgsi_action *action, 944 struct lp_build_tgsi_context *bld_base, 945 struct lp_build_emit_data *emit_data) 946 { 947 struct si_shader_context *ctx = si_shader_context(bld_base); 948 ac_build_else(&ctx->ac, get_line(bld_base->pc)); 949 } 950 951 static void endif_emit(const struct lp_build_tgsi_action *action, 952 struct lp_build_tgsi_context *bld_base, 953 struct lp_build_emit_data *emit_data) 954 { 955 struct si_shader_context *ctx = si_shader_context(bld_base); 956 ac_build_endif(&ctx->ac, get_line(bld_base->pc)); 957 } 958 959 static void endloop_emit(const struct lp_build_tgsi_action *action, 960 struct lp_build_tgsi_context *bld_base, 961 struct lp_build_emit_data *emit_data) 962 { 963 struct si_shader_context *ctx = si_shader_context(bld_base); 964 ac_build_endloop(&ctx->ac, get_line(bld_base->pc)); 965 } 966 967 static void if_emit(const struct lp_build_tgsi_action *action, 968 struct lp_build_tgsi_context *bld_base, 969 struct lp_build_emit_data *emit_data) 970 { 971 struct si_shader_context *ctx = si_shader_context(bld_base); 972 ac_build_if(&ctx->ac, emit_data->args[0], get_line(bld_base->pc)); 973 } 974 975 static void uif_emit(const struct lp_build_tgsi_action *action, 976 struct lp_build_tgsi_context *bld_base, 977 struct lp_build_emit_data *emit_data) 978 { 979 struct si_shader_context *ctx = si_shader_context(bld_base); 980 ac_build_uif(&ctx->ac, emit_data->args[0], get_line(bld_base->pc)); 981 } 982 983 static void emit_immediate(struct lp_build_tgsi_context *bld_base, 984 const struct tgsi_full_immediate *imm) 985 { 986 unsigned i; 987 struct si_shader_context *ctx = si_shader_context(bld_base); 988 989 for (i = 0; i < 4; ++i) { 990 ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] = 991 LLVMConstInt(ctx->i32, imm->u[i].Uint, false ); 992 } 993 994 ctx->imms_num++; 995 } 996 997 void si_llvm_context_init(struct si_shader_context *ctx, 998 struct si_screen *sscreen, 999 LLVMTargetMachineRef tm) 1000 { 1001 struct lp_type type; 1002 1003 /* Initialize the gallivm object: 1004 * We are only using the module, context, and builder fields of this struct. 1005 * This should be enough for us to be able to pass our gallivm struct to the 1006 * helper functions in the gallivm module. 1007 */ 1008 memset(ctx, 0, sizeof(*ctx)); 1009 ctx->screen = sscreen; 1010 ctx->tm = tm; 1011 1012 ctx->gallivm.context = LLVMContextCreate(); 1013 ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi", 1014 ctx->gallivm.context); 1015 LLVMSetTarget(ctx->gallivm.module, "amdgcn--"); 1016 1017 LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm); 1018 char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout); 1019 LLVMSetDataLayout(ctx->gallivm.module, data_layout_str); 1020 LLVMDisposeTargetData(data_layout); 1021 LLVMDisposeMessage(data_layout_str); 1022 1023 bool unsafe_fpmath = (sscreen->debug_flags & DBG(UNSAFE_MATH)) != 0; 1024 enum ac_float_mode float_mode = 1025 unsafe_fpmath ? AC_FLOAT_MODE_UNSAFE_FP_MATH : 1026 AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH; 1027 1028 ctx->gallivm.builder = ac_create_builder(ctx->gallivm.context, 1029 float_mode); 1030 1031 ac_llvm_context_init(&ctx->ac, ctx->gallivm.context, 1032 sscreen->info.chip_class, sscreen->info.family); 1033 ctx->ac.module = ctx->gallivm.module; 1034 ctx->ac.builder = ctx->gallivm.builder; 1035 1036 struct lp_build_tgsi_context *bld_base = &ctx->bld_base; 1037 1038 type.floating = true; 1039 type.fixed = false; 1040 type.sign = true; 1041 type.norm = false; 1042 type.width = 32; 1043 type.length = 1; 1044 1045 lp_build_context_init(&bld_base->base, &ctx->gallivm, type); 1046 lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type)); 1047 lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type)); 1048 type.width *= 2; 1049 lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type); 1050 lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type)); 1051 lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type)); 1052 1053 bld_base->soa = 1; 1054 bld_base->emit_swizzle = emit_swizzle; 1055 bld_base->emit_declaration = emit_declaration; 1056 bld_base->emit_immediate = emit_immediate; 1057 1058 /* metadata allowing 2.5 ULP */ 1059 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->ac.context, 1060 "fpmath", 6); 1061 LLVMValueRef arg = LLVMConstReal(ctx->ac.f32, 2.5); 1062 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->ac.context, 1063 &arg, 1); 1064 1065 bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; 1066 bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit; 1067 bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit; 1068 bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit; 1069 bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit; 1070 bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit; 1071 bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; 1072 bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; 1073 1074 si_shader_context_init_alu(&ctx->bld_base); 1075 si_shader_context_init_mem(ctx); 1076 1077 ctx->voidt = LLVMVoidTypeInContext(ctx->ac.context); 1078 ctx->i1 = LLVMInt1TypeInContext(ctx->ac.context); 1079 ctx->i8 = LLVMInt8TypeInContext(ctx->ac.context); 1080 ctx->i32 = LLVMInt32TypeInContext(ctx->ac.context); 1081 ctx->i64 = LLVMInt64TypeInContext(ctx->ac.context); 1082 ctx->i128 = LLVMIntTypeInContext(ctx->ac.context, 128); 1083 ctx->f32 = LLVMFloatTypeInContext(ctx->ac.context); 1084 ctx->v2i32 = LLVMVectorType(ctx->i32, 2); 1085 ctx->v4i32 = LLVMVectorType(ctx->i32, 4); 1086 ctx->v4f32 = LLVMVectorType(ctx->f32, 4); 1087 ctx->v8i32 = LLVMVectorType(ctx->i32, 8); 1088 1089 ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0); 1090 ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0); 1091 } 1092 1093 /* Set the context to a certain TGSI shader. Can be called repeatedly 1094 * to change the shader. */ 1095 void si_llvm_context_set_tgsi(struct si_shader_context *ctx, 1096 struct si_shader *shader) 1097 { 1098 const struct tgsi_shader_info *info = NULL; 1099 const struct tgsi_token *tokens = NULL; 1100 1101 if (shader && shader->selector) { 1102 info = &shader->selector->info; 1103 tokens = shader->selector->tokens; 1104 } 1105 1106 ctx->shader = shader; 1107 ctx->type = info ? info->processor : -1; 1108 ctx->bld_base.info = info; 1109 1110 /* Clean up the old contents. */ 1111 FREE(ctx->temp_arrays); 1112 ctx->temp_arrays = NULL; 1113 FREE(ctx->temp_array_allocas); 1114 ctx->temp_array_allocas = NULL; 1115 1116 FREE(ctx->imms); 1117 ctx->imms = NULL; 1118 ctx->imms_num = 0; 1119 1120 FREE(ctx->temps); 1121 ctx->temps = NULL; 1122 ctx->temps_count = 0; 1123 1124 if (!info || !tokens) 1125 return; 1126 1127 if (info->array_max[TGSI_FILE_TEMPORARY] > 0) { 1128 int size = info->array_max[TGSI_FILE_TEMPORARY]; 1129 1130 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0])); 1131 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0])); 1132 1133 tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size, 1134 ctx->temp_arrays); 1135 } 1136 if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) { 1137 int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1; 1138 ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef)); 1139 } 1140 1141 /* Re-set these to start with a clean slate. */ 1142 ctx->bld_base.num_instructions = 0; 1143 ctx->bld_base.pc = 0; 1144 memset(ctx->outputs, 0, sizeof(ctx->outputs)); 1145 1146 ctx->bld_base.emit_store = si_llvm_emit_store; 1147 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch; 1148 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch; 1149 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch; 1150 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch; 1151 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value; 1152 1153 ctx->num_const_buffers = util_last_bit(info->const_buffers_declared); 1154 ctx->num_shader_buffers = util_last_bit(info->shader_buffers_declared); 1155 ctx->num_samplers = util_last_bit(info->samplers_declared); 1156 ctx->num_images = util_last_bit(info->images_declared); 1157 } 1158 1159 void si_llvm_create_func(struct si_shader_context *ctx, 1160 const char *name, 1161 LLVMTypeRef *return_types, unsigned num_return_elems, 1162 LLVMTypeRef *ParamTypes, unsigned ParamCount) 1163 { 1164 LLVMTypeRef main_fn_type, ret_type; 1165 LLVMBasicBlockRef main_fn_body; 1166 enum si_llvm_calling_convention call_conv; 1167 unsigned real_shader_type; 1168 1169 if (num_return_elems) 1170 ret_type = LLVMStructTypeInContext(ctx->ac.context, 1171 return_types, 1172 num_return_elems, true); 1173 else 1174 ret_type = ctx->voidt; 1175 1176 /* Setup the function */ 1177 ctx->return_type = ret_type; 1178 main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0); 1179 ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type); 1180 main_fn_body = LLVMAppendBasicBlockInContext(ctx->ac.context, 1181 ctx->main_fn, "main_body"); 1182 LLVMPositionBuilderAtEnd(ctx->ac.builder, main_fn_body); 1183 1184 real_shader_type = ctx->type; 1185 1186 /* LS is merged into HS (TCS), and ES is merged into GS. */ 1187 if (ctx->screen->info.chip_class >= GFX9) { 1188 if (ctx->shader->key.as_ls) 1189 real_shader_type = PIPE_SHADER_TESS_CTRL; 1190 else if (ctx->shader->key.as_es) 1191 real_shader_type = PIPE_SHADER_GEOMETRY; 1192 } 1193 1194 switch (real_shader_type) { 1195 case PIPE_SHADER_VERTEX: 1196 case PIPE_SHADER_TESS_EVAL: 1197 call_conv = RADEON_LLVM_AMDGPU_VS; 1198 break; 1199 case PIPE_SHADER_TESS_CTRL: 1200 call_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS : 1201 RADEON_LLVM_AMDGPU_VS; 1202 break; 1203 case PIPE_SHADER_GEOMETRY: 1204 call_conv = RADEON_LLVM_AMDGPU_GS; 1205 break; 1206 case PIPE_SHADER_FRAGMENT: 1207 call_conv = RADEON_LLVM_AMDGPU_PS; 1208 break; 1209 case PIPE_SHADER_COMPUTE: 1210 call_conv = RADEON_LLVM_AMDGPU_CS; 1211 break; 1212 default: 1213 unreachable("Unhandle shader type"); 1214 } 1215 1216 LLVMSetFunctionCallConv(ctx->main_fn, call_conv); 1217 } 1218 1219 void si_llvm_optimize_module(struct si_shader_context *ctx) 1220 { 1221 struct gallivm_state *gallivm = &ctx->gallivm; 1222 const char *triple = LLVMGetTarget(gallivm->module); 1223 LLVMTargetLibraryInfoRef target_library_info; 1224 1225 /* Dump LLVM IR before any optimization passes */ 1226 if (ctx->screen->debug_flags & DBG(PREOPT_IR) && 1227 si_can_dump_shader(ctx->screen, ctx->type)) 1228 LLVMDumpModule(ctx->gallivm.module); 1229 1230 /* Create the pass manager */ 1231 gallivm->passmgr = LLVMCreatePassManager(); 1232 1233 target_library_info = gallivm_create_target_library_info(triple); 1234 LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr); 1235 1236 if (si_extra_shader_checks(ctx->screen, ctx->type)) 1237 LLVMAddVerifierPass(gallivm->passmgr); 1238 1239 LLVMAddAlwaysInlinerPass(gallivm->passmgr); 1240 1241 /* This pass should eliminate all the load and store instructions */ 1242 LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr); 1243 1244 /* Add some optimization passes */ 1245 LLVMAddScalarReplAggregatesPass(gallivm->passmgr); 1246 LLVMAddLICMPass(gallivm->passmgr); 1247 LLVMAddAggressiveDCEPass(gallivm->passmgr); 1248 LLVMAddCFGSimplificationPass(gallivm->passmgr); 1249 #if HAVE_LLVM >= 0x0400 1250 /* This is recommended by the instruction combining pass. */ 1251 LLVMAddEarlyCSEMemSSAPass(gallivm->passmgr); 1252 #endif 1253 LLVMAddInstructionCombiningPass(gallivm->passmgr); 1254 1255 /* Run the pass */ 1256 LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module); 1257 1258 LLVMDisposeBuilder(ctx->ac.builder); 1259 LLVMDisposePassManager(gallivm->passmgr); 1260 gallivm_dispose_target_library_info(target_library_info); 1261 } 1262 1263 void si_llvm_dispose(struct si_shader_context *ctx) 1264 { 1265 LLVMDisposeModule(ctx->gallivm.module); 1266 LLVMContextDispose(ctx->gallivm.context); 1267 FREE(ctx->temp_arrays); 1268 ctx->temp_arrays = NULL; 1269 FREE(ctx->temp_array_allocas); 1270 ctx->temp_array_allocas = NULL; 1271 FREE(ctx->temps); 1272 ctx->temps = NULL; 1273 ctx->temps_count = 0; 1274 FREE(ctx->imms); 1275 ctx->imms = NULL; 1276 ctx->imms_num = 0; 1277 ac_llvm_context_dispose(&ctx->ac); 1278 } 1279