1 //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file provides AMDGPU specific target streamer methods. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPUTargetStreamer.h" 15 #include "AMDGPU.h" 16 #include "SIDefines.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/Twine.h" 20 #include "llvm/BinaryFormat/ELF.h" 21 #include "llvm/IR/Constants.h" 22 #include "llvm/IR/Function.h" 23 #include "llvm/IR/Metadata.h" 24 #include "llvm/IR/Module.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCELFStreamer.h" 27 #include "llvm/MC/MCObjectFileInfo.h" 28 #include "llvm/MC/MCSectionELF.h" 29 #include "llvm/Support/FormattedStream.h" 30 31 namespace llvm { 32 #include "AMDGPUPTNote.h" 33 } 34 35 using namespace llvm; 36 using namespace llvm::AMDGPU; 37 38 //===----------------------------------------------------------------------===// 39 // AMDGPUTargetStreamer 40 //===----------------------------------------------------------------------===// 41 42 static const struct { 43 const char *Name; 44 unsigned Mach; 45 } MachTable[] = { 46 // Radeon HD 2000/3000 Series (R600). 47 { "r600", ELF::EF_AMDGPU_MACH_R600_R600 }, 48 { "r630", ELF::EF_AMDGPU_MACH_R600_R630 }, 49 { "rs880", ELF::EF_AMDGPU_MACH_R600_RS880 }, 50 { "rv670", ELF::EF_AMDGPU_MACH_R600_RV670 }, 51 // Radeon HD 4000 Series (R700). 52 { "rv710", ELF::EF_AMDGPU_MACH_R600_RV710 }, 53 { "rv730", ELF::EF_AMDGPU_MACH_R600_RV730 }, 54 { "rv770", ELF::EF_AMDGPU_MACH_R600_RV770 }, 55 // Radeon HD 5000 Series (Evergreen). 56 { "cedar", ELF::EF_AMDGPU_MACH_R600_CEDAR }, 57 { "cypress", ELF::EF_AMDGPU_MACH_R600_CYPRESS }, 58 { "juniper", ELF::EF_AMDGPU_MACH_R600_JUNIPER }, 59 { "redwood", ELF::EF_AMDGPU_MACH_R600_REDWOOD }, 60 { "sumo", ELF::EF_AMDGPU_MACH_R600_SUMO }, 61 // Radeon HD 6000 Series (Northern Islands). 62 { "barts", ELF::EF_AMDGPU_MACH_R600_BARTS }, 63 { "caicos", ELF::EF_AMDGPU_MACH_R600_CAICOS }, 64 { "cayman", ELF::EF_AMDGPU_MACH_R600_CAYMAN }, 65 { "turks", ELF::EF_AMDGPU_MACH_R600_TURKS }, 66 // AMDGCN GFX6. 67 { "gfx600", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 }, 68 { "tahiti", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 }, 69 { "gfx601", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 }, 70 { "hainan", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 }, 71 { "oland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 }, 72 { "pitcairn", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 }, 73 { "verde", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 }, 74 // AMDGCN GFX7. 75 { "gfx700", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 }, 76 { "kaveri", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 }, 77 { "gfx701", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 }, 78 { "hawaii", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 }, 79 { "gfx702", ELF::EF_AMDGPU_MACH_AMDGCN_GFX702 }, 80 { "gfx703", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 }, 81 { "kabini", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 }, 82 { "mullins", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 }, 83 { "gfx704", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 }, 84 { "bonaire", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 }, 85 // AMDGCN GFX8. 86 { "gfx801", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 }, 87 { "carrizo", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 }, 88 { "gfx802", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 }, 89 { "iceland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 }, 90 { "tonga", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 }, 91 { "gfx803", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 }, 92 { "fiji", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 }, 93 { "polaris10", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 }, 94 { "polaris11", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 }, 95 { "gfx810", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 }, 96 { "stoney", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 }, 97 // AMDGCN GFX9. 98 { "gfx900", ELF::EF_AMDGPU_MACH_AMDGCN_GFX900 }, 99 { "gfx902", ELF::EF_AMDGPU_MACH_AMDGCN_GFX902 }, 100 { "gfx904", ELF::EF_AMDGPU_MACH_AMDGCN_GFX904 }, 101 { "gfx906", ELF::EF_AMDGPU_MACH_AMDGCN_GFX906 }, 102 // Not specified processor. 103 { nullptr, ELF::EF_AMDGPU_MACH_NONE } 104 }; 105 106 unsigned AMDGPUTargetStreamer::getMACH(StringRef GPU) const { 107 auto Entry = MachTable; 108 for (; Entry->Name && GPU != Entry->Name; ++Entry) 109 ; 110 return Entry->Mach; 111 } 112 113 const char *AMDGPUTargetStreamer::getMachName(unsigned Mach) { 114 auto Entry = MachTable; 115 for (; Entry->Name && Mach != Entry->Mach; ++Entry) 116 ; 117 return Entry->Name; 118 } 119 120 bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) { 121 HSAMD::Metadata HSAMetadata; 122 if (HSAMD::fromString(HSAMetadataString, HSAMetadata)) 123 return false; 124 125 return EmitHSAMetadata(HSAMetadata); 126 } 127 128 //===----------------------------------------------------------------------===// 129 // AMDGPUTargetAsmStreamer 130 //===----------------------------------------------------------------------===// 131 132 AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S, 133 formatted_raw_ostream &OS) 134 : AMDGPUTargetStreamer(S), OS(OS) { } 135 136 void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) { 137 OS << "\t.amdgcn_target \"" << Target << "\"\n"; 138 } 139 140 void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion( 141 uint32_t Major, uint32_t Minor) { 142 OS << "\t.hsa_code_object_version " << 143 Twine(Major) << "," << Twine(Minor) << '\n'; 144 } 145 146 void 147 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major, 148 uint32_t Minor, 149 uint32_t Stepping, 150 StringRef VendorName, 151 StringRef ArchName) { 152 OS << "\t.hsa_code_object_isa " << 153 Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) << 154 ",\"" << VendorName << "\",\"" << ArchName << "\"\n"; 155 156 } 157 158 void 159 AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { 160 OS << "\t.amd_kernel_code_t\n"; 161 dumpAmdKernelCode(&Header, OS, "\t\t"); 162 OS << "\t.end_amd_kernel_code_t\n"; 163 } 164 165 void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, 166 unsigned Type) { 167 switch (Type) { 168 default: llvm_unreachable("Invalid AMDGPU symbol type"); 169 case ELF::STT_AMDGPU_HSA_KERNEL: 170 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ; 171 break; 172 } 173 } 174 175 bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) { 176 OS << "\t.amd_amdgpu_isa \"" << IsaVersionString << "\"\n"; 177 return true; 178 } 179 180 bool AMDGPUTargetAsmStreamer::EmitHSAMetadata( 181 const AMDGPU::HSAMD::Metadata &HSAMetadata) { 182 std::string HSAMetadataString; 183 if (HSAMD::toString(HSAMetadata, HSAMetadataString)) 184 return false; 185 186 OS << '\t' << HSAMD::AssemblerDirectiveBegin << '\n'; 187 OS << HSAMetadataString << '\n'; 188 OS << '\t' << HSAMD::AssemblerDirectiveEnd << '\n'; 189 return true; 190 } 191 192 bool AMDGPUTargetAsmStreamer::EmitPALMetadata( 193 const PALMD::Metadata &PALMetadata) { 194 std::string PALMetadataString; 195 if (PALMD::toString(PALMetadata, PALMetadataString)) 196 return false; 197 198 OS << '\t' << PALMD::AssemblerDirective << PALMetadataString << '\n'; 199 return true; 200 } 201 202 void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( 203 const MCSubtargetInfo &STI, StringRef KernelName, 204 const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR, 205 bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) { 206 amdhsa::kernel_descriptor_t DefaultKD = getDefaultAmdhsaKernelDescriptor(); 207 208 IsaInfo::IsaVersion IVersion = IsaInfo::getIsaVersion(STI.getFeatureBits()); 209 210 OS << "\t.amdhsa_kernel " << KernelName << '\n'; 211 212 #define PRINT_IF_NOT_DEFAULT(STREAM, DIRECTIVE, KERNEL_DESC, \ 213 DEFAULT_KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \ 214 if (AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) != \ 215 AMDHSA_BITS_GET(DEFAULT_KERNEL_DESC.MEMBER_NAME, FIELD_NAME)) \ 216 STREAM << "\t\t" << DIRECTIVE << " " \ 217 << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n'; 218 219 if (KD.group_segment_fixed_size != DefaultKD.group_segment_fixed_size) 220 OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size 221 << '\n'; 222 if (KD.private_segment_fixed_size != DefaultKD.private_segment_fixed_size) 223 OS << "\t\t.amdhsa_private_segment_fixed_size " 224 << KD.private_segment_fixed_size << '\n'; 225 226 PRINT_IF_NOT_DEFAULT( 227 OS, ".amdhsa_user_sgpr_private_segment_buffer", KD, DefaultKD, 228 kernel_code_properties, 229 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); 230 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD, DefaultKD, 231 kernel_code_properties, 232 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); 233 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_queue_ptr", KD, DefaultKD, 234 kernel_code_properties, 235 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); 236 PRINT_IF_NOT_DEFAULT( 237 OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD, DefaultKD, 238 kernel_code_properties, 239 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); 240 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_dispatch_id", KD, DefaultKD, 241 kernel_code_properties, 242 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); 243 PRINT_IF_NOT_DEFAULT( 244 OS, ".amdhsa_user_sgpr_flat_scratch_init", KD, DefaultKD, 245 kernel_code_properties, 246 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); 247 PRINT_IF_NOT_DEFAULT( 248 OS, ".amdhsa_user_sgpr_private_segment_size", KD, DefaultKD, 249 kernel_code_properties, 250 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); 251 PRINT_IF_NOT_DEFAULT( 252 OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD, DefaultKD, 253 compute_pgm_rsrc2, 254 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET); 255 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD, DefaultKD, 256 compute_pgm_rsrc2, 257 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X); 258 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD, DefaultKD, 259 compute_pgm_rsrc2, 260 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y); 261 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD, DefaultKD, 262 compute_pgm_rsrc2, 263 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z); 264 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_info", KD, DefaultKD, 265 compute_pgm_rsrc2, 266 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO); 267 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_vgpr_workitem_id", KD, DefaultKD, 268 compute_pgm_rsrc2, 269 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID); 270 271 // These directives are required. 272 OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n'; 273 OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n'; 274 275 if (!ReserveVCC) 276 OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n'; 277 if (IVersion.Major >= 7 && !ReserveFlatScr) 278 OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n'; 279 if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI)) 280 OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n'; 281 282 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_round_mode_32", KD, DefaultKD, 283 compute_pgm_rsrc1, 284 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32); 285 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_round_mode_16_64", KD, DefaultKD, 286 compute_pgm_rsrc1, 287 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64); 288 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_denorm_mode_32", KD, DefaultKD, 289 compute_pgm_rsrc1, 290 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32); 291 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_denorm_mode_16_64", KD, DefaultKD, 292 compute_pgm_rsrc1, 293 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64); 294 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_dx10_clamp", KD, DefaultKD, 295 compute_pgm_rsrc1, 296 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP); 297 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_ieee_mode", KD, DefaultKD, 298 compute_pgm_rsrc1, 299 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE); 300 if (IVersion.Major >= 9) 301 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_fp16_overflow", KD, DefaultKD, 302 compute_pgm_rsrc1, 303 amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL); 304 PRINT_IF_NOT_DEFAULT( 305 OS, ".amdhsa_exception_fp_ieee_invalid_op", KD, DefaultKD, 306 compute_pgm_rsrc2, 307 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION); 308 PRINT_IF_NOT_DEFAULT( 309 OS, ".amdhsa_exception_fp_denorm_src", KD, DefaultKD, compute_pgm_rsrc2, 310 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE); 311 PRINT_IF_NOT_DEFAULT( 312 OS, ".amdhsa_exception_fp_ieee_div_zero", KD, DefaultKD, 313 compute_pgm_rsrc2, 314 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO); 315 PRINT_IF_NOT_DEFAULT( 316 OS, ".amdhsa_exception_fp_ieee_overflow", KD, DefaultKD, 317 compute_pgm_rsrc2, 318 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW); 319 PRINT_IF_NOT_DEFAULT( 320 OS, ".amdhsa_exception_fp_ieee_underflow", KD, DefaultKD, 321 compute_pgm_rsrc2, 322 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW); 323 PRINT_IF_NOT_DEFAULT( 324 OS, ".amdhsa_exception_fp_ieee_inexact", KD, DefaultKD, compute_pgm_rsrc2, 325 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT); 326 PRINT_IF_NOT_DEFAULT( 327 OS, ".amdhsa_exception_int_div_zero", KD, DefaultKD, compute_pgm_rsrc2, 328 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO); 329 #undef PRINT_IF_NOT_DEFAULT 330 331 OS << "\t.end_amdhsa_kernel\n"; 332 } 333 334 //===----------------------------------------------------------------------===// 335 // AMDGPUTargetELFStreamer 336 //===----------------------------------------------------------------------===// 337 338 AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer( 339 MCStreamer &S, const MCSubtargetInfo &STI) 340 : AMDGPUTargetStreamer(S), Streamer(S) { 341 MCAssembler &MCA = getStreamer().getAssembler(); 342 unsigned EFlags = MCA.getELFHeaderEFlags(); 343 344 EFlags &= ~ELF::EF_AMDGPU_MACH; 345 EFlags |= getMACH(STI.getCPU()); 346 347 EFlags &= ~ELF::EF_AMDGPU_XNACK; 348 if (AMDGPU::hasXNACK(STI)) 349 EFlags |= ELF::EF_AMDGPU_XNACK; 350 351 MCA.setELFHeaderEFlags(EFlags); 352 } 353 354 MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() { 355 return static_cast<MCELFStreamer &>(Streamer); 356 } 357 358 void AMDGPUTargetELFStreamer::EmitAMDGPUNote( 359 const MCExpr *DescSZ, unsigned NoteType, 360 function_ref<void(MCELFStreamer &)> EmitDesc) { 361 auto &S = getStreamer(); 362 auto &Context = S.getContext(); 363 364 auto NameSZ = sizeof(ElfNote::NoteName); 365 366 S.PushSection(); 367 S.SwitchSection(Context.getELFSection( 368 ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC)); 369 S.EmitIntValue(NameSZ, 4); // namesz 370 S.EmitValue(DescSZ, 4); // descz 371 S.EmitIntValue(NoteType, 4); // type 372 S.EmitBytes(StringRef(ElfNote::NoteName, NameSZ)); // name 373 S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 374 EmitDesc(S); // desc 375 S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 376 S.PopSection(); 377 } 378 379 void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {} 380 381 void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion( 382 uint32_t Major, uint32_t Minor) { 383 384 EmitAMDGPUNote( 385 MCConstantExpr::create(8, getContext()), 386 ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, 387 [&](MCELFStreamer &OS){ 388 OS.EmitIntValue(Major, 4); 389 OS.EmitIntValue(Minor, 4); 390 } 391 ); 392 } 393 394 void 395 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major, 396 uint32_t Minor, 397 uint32_t Stepping, 398 StringRef VendorName, 399 StringRef ArchName) { 400 uint16_t VendorNameSize = VendorName.size() + 1; 401 uint16_t ArchNameSize = ArchName.size() + 1; 402 403 unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) + 404 sizeof(Major) + sizeof(Minor) + sizeof(Stepping) + 405 VendorNameSize + ArchNameSize; 406 407 EmitAMDGPUNote( 408 MCConstantExpr::create(DescSZ, getContext()), 409 ElfNote::NT_AMDGPU_HSA_ISA, 410 [&](MCELFStreamer &OS) { 411 OS.EmitIntValue(VendorNameSize, 2); 412 OS.EmitIntValue(ArchNameSize, 2); 413 OS.EmitIntValue(Major, 4); 414 OS.EmitIntValue(Minor, 4); 415 OS.EmitIntValue(Stepping, 4); 416 OS.EmitBytes(VendorName); 417 OS.EmitIntValue(0, 1); // NULL terminate VendorName 418 OS.EmitBytes(ArchName); 419 OS.EmitIntValue(0, 1); // NULL terminte ArchName 420 } 421 ); 422 } 423 424 void 425 AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { 426 427 MCStreamer &OS = getStreamer(); 428 OS.PushSection(); 429 OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header))); 430 OS.PopSection(); 431 } 432 433 void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, 434 unsigned Type) { 435 MCSymbolELF *Symbol = cast<MCSymbolELF>( 436 getStreamer().getContext().getOrCreateSymbol(SymbolName)); 437 Symbol->setType(Type); 438 } 439 440 bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) { 441 // Create two labels to mark the beginning and end of the desc field 442 // and a MCExpr to calculate the size of the desc field. 443 auto &Context = getContext(); 444 auto *DescBegin = Context.createTempSymbol(); 445 auto *DescEnd = Context.createTempSymbol(); 446 auto *DescSZ = MCBinaryExpr::createSub( 447 MCSymbolRefExpr::create(DescEnd, Context), 448 MCSymbolRefExpr::create(DescBegin, Context), Context); 449 450 EmitAMDGPUNote( 451 DescSZ, 452 ELF::NT_AMD_AMDGPU_ISA, 453 [&](MCELFStreamer &OS) { 454 OS.EmitLabel(DescBegin); 455 OS.EmitBytes(IsaVersionString); 456 OS.EmitLabel(DescEnd); 457 } 458 ); 459 return true; 460 } 461 462 bool AMDGPUTargetELFStreamer::EmitHSAMetadata( 463 const AMDGPU::HSAMD::Metadata &HSAMetadata) { 464 std::string HSAMetadataString; 465 if (HSAMD::toString(HSAMetadata, HSAMetadataString)) 466 return false; 467 468 // Create two labels to mark the beginning and end of the desc field 469 // and a MCExpr to calculate the size of the desc field. 470 auto &Context = getContext(); 471 auto *DescBegin = Context.createTempSymbol(); 472 auto *DescEnd = Context.createTempSymbol(); 473 auto *DescSZ = MCBinaryExpr::createSub( 474 MCSymbolRefExpr::create(DescEnd, Context), 475 MCSymbolRefExpr::create(DescBegin, Context), Context); 476 477 EmitAMDGPUNote( 478 DescSZ, 479 ELF::NT_AMD_AMDGPU_HSA_METADATA, 480 [&](MCELFStreamer &OS) { 481 OS.EmitLabel(DescBegin); 482 OS.EmitBytes(HSAMetadataString); 483 OS.EmitLabel(DescEnd); 484 } 485 ); 486 return true; 487 } 488 489 bool AMDGPUTargetELFStreamer::EmitPALMetadata( 490 const PALMD::Metadata &PALMetadata) { 491 EmitAMDGPUNote( 492 MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t), getContext()), 493 ELF::NT_AMD_AMDGPU_PAL_METADATA, 494 [&](MCELFStreamer &OS){ 495 for (auto I : PALMetadata) 496 OS.EmitIntValue(I, sizeof(uint32_t)); 497 } 498 ); 499 return true; 500 } 501 502 void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( 503 const MCSubtargetInfo &STI, StringRef KernelName, 504 const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, 505 uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, 506 bool ReserveXNACK) { 507 auto &Streamer = getStreamer(); 508 auto &Context = Streamer.getContext(); 509 510 MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>( 511 Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd"))); 512 KernelDescriptorSymbol->setBinding(ELF::STB_GLOBAL); 513 KernelDescriptorSymbol->setType(ELF::STT_OBJECT); 514 KernelDescriptorSymbol->setSize( 515 MCConstantExpr::create(sizeof(KernelDescriptor), Context)); 516 517 MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>( 518 Context.getOrCreateSymbol(Twine(KernelName))); 519 KernelCodeSymbol->setBinding(ELF::STB_LOCAL); 520 521 Streamer.EmitLabel(KernelDescriptorSymbol); 522 Streamer.EmitBytes(StringRef( 523 (const char*)&(KernelDescriptor), 524 offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset))); 525 // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The 526 // expression being created is: 527 // (start of kernel code) - (start of kernel descriptor) 528 // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64. 529 Streamer.EmitValue(MCBinaryExpr::createSub( 530 MCSymbolRefExpr::create( 531 KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context), 532 MCSymbolRefExpr::create( 533 KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context), 534 Context), 535 sizeof(KernelDescriptor.kernel_code_entry_byte_offset)); 536 Streamer.EmitBytes(StringRef( 537 (const char*)&(KernelDescriptor) + 538 offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) + 539 sizeof(KernelDescriptor.kernel_code_entry_byte_offset), 540 sizeof(KernelDescriptor) - 541 offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) - 542 sizeof(KernelDescriptor.kernel_code_entry_byte_offset))); 543 } 544