Home | History | Annotate | Download | only in MCTargetDesc
      1 //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file provides AMDGPU specific target streamer methods.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "AMDGPUTargetStreamer.h"
     15 #include "AMDGPU.h"
     16 #include "SIDefines.h"
     17 #include "Utils/AMDGPUBaseInfo.h"
     18 #include "Utils/AMDKernelCodeTUtils.h"
     19 #include "llvm/ADT/Twine.h"
     20 #include "llvm/BinaryFormat/ELF.h"
     21 #include "llvm/IR/Constants.h"
     22 #include "llvm/IR/Function.h"
     23 #include "llvm/IR/Metadata.h"
     24 #include "llvm/IR/Module.h"
     25 #include "llvm/MC/MCContext.h"
     26 #include "llvm/MC/MCELFStreamer.h"
     27 #include "llvm/MC/MCObjectFileInfo.h"
     28 #include "llvm/MC/MCSectionELF.h"
     29 #include "llvm/Support/FormattedStream.h"
     30 
     31 namespace llvm {
     32 #include "AMDGPUPTNote.h"
     33 }
     34 
     35 using namespace llvm;
     36 using namespace llvm::AMDGPU;
     37 
     38 //===----------------------------------------------------------------------===//
     39 // AMDGPUTargetStreamer
     40 //===----------------------------------------------------------------------===//
     41 
     42 static const struct {
     43   const char *Name;
     44   unsigned Mach;
     45 } MachTable[] = {
     46       // Radeon HD 2000/3000 Series (R600).
     47       { "r600", ELF::EF_AMDGPU_MACH_R600_R600 },
     48       { "r630", ELF::EF_AMDGPU_MACH_R600_R630 },
     49       { "rs880", ELF::EF_AMDGPU_MACH_R600_RS880 },
     50       { "rv670", ELF::EF_AMDGPU_MACH_R600_RV670 },
     51       // Radeon HD 4000 Series (R700).
     52       { "rv710", ELF::EF_AMDGPU_MACH_R600_RV710 },
     53       { "rv730", ELF::EF_AMDGPU_MACH_R600_RV730 },
     54       { "rv770", ELF::EF_AMDGPU_MACH_R600_RV770 },
     55       // Radeon HD 5000 Series (Evergreen).
     56       { "cedar", ELF::EF_AMDGPU_MACH_R600_CEDAR },
     57       { "cypress", ELF::EF_AMDGPU_MACH_R600_CYPRESS },
     58       { "juniper", ELF::EF_AMDGPU_MACH_R600_JUNIPER },
     59       { "redwood", ELF::EF_AMDGPU_MACH_R600_REDWOOD },
     60       { "sumo", ELF::EF_AMDGPU_MACH_R600_SUMO },
     61       // Radeon HD 6000 Series (Northern Islands).
     62       { "barts", ELF::EF_AMDGPU_MACH_R600_BARTS },
     63       { "caicos", ELF::EF_AMDGPU_MACH_R600_CAICOS },
     64       { "cayman", ELF::EF_AMDGPU_MACH_R600_CAYMAN },
     65       { "turks", ELF::EF_AMDGPU_MACH_R600_TURKS },
     66       // AMDGCN GFX6.
     67       { "gfx600", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
     68       { "tahiti", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
     69       { "gfx601", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
     70       { "hainan", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
     71       { "oland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
     72       { "pitcairn", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
     73       { "verde", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
     74       // AMDGCN GFX7.
     75       { "gfx700", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
     76       { "kaveri", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
     77       { "gfx701", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
     78       { "hawaii", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
     79       { "gfx702", ELF::EF_AMDGPU_MACH_AMDGCN_GFX702 },
     80       { "gfx703", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
     81       { "kabini", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
     82       { "mullins", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
     83       { "gfx704", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
     84       { "bonaire", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
     85       // AMDGCN GFX8.
     86       { "gfx801", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
     87       { "carrizo", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
     88       { "gfx802", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
     89       { "iceland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
     90       { "tonga", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
     91       { "gfx803", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
     92       { "fiji", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
     93       { "polaris10", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
     94       { "polaris11", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
     95       { "gfx810", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
     96       { "stoney", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
     97       // AMDGCN GFX9.
     98       { "gfx900", ELF::EF_AMDGPU_MACH_AMDGCN_GFX900 },
     99       { "gfx902", ELF::EF_AMDGPU_MACH_AMDGCN_GFX902 },
    100       { "gfx904", ELF::EF_AMDGPU_MACH_AMDGCN_GFX904 },
    101       { "gfx906", ELF::EF_AMDGPU_MACH_AMDGCN_GFX906 },
    102       // Not specified processor.
    103       { nullptr, ELF::EF_AMDGPU_MACH_NONE }
    104 };
    105 
    106 unsigned AMDGPUTargetStreamer::getMACH(StringRef GPU) const {
    107   auto Entry = MachTable;
    108   for (; Entry->Name && GPU != Entry->Name; ++Entry)
    109     ;
    110   return Entry->Mach;
    111 }
    112 
    113 const char *AMDGPUTargetStreamer::getMachName(unsigned Mach) {
    114   auto Entry = MachTable;
    115   for (; Entry->Name && Mach != Entry->Mach; ++Entry)
    116     ;
    117   return Entry->Name;
    118 }
    119 
    120 bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
    121   HSAMD::Metadata HSAMetadata;
    122   if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
    123     return false;
    124 
    125   return EmitHSAMetadata(HSAMetadata);
    126 }
    127 
    128 //===----------------------------------------------------------------------===//
    129 // AMDGPUTargetAsmStreamer
    130 //===----------------------------------------------------------------------===//
    131 
    132 AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
    133                                                  formatted_raw_ostream &OS)
    134     : AMDGPUTargetStreamer(S), OS(OS) { }
    135 
    136 void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {
    137   OS << "\t.amdgcn_target \"" << Target << "\"\n";
    138 }
    139 
    140 void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(
    141     uint32_t Major, uint32_t Minor) {
    142   OS << "\t.hsa_code_object_version " <<
    143         Twine(Major) << "," << Twine(Minor) << '\n';
    144 }
    145 
    146 void
    147 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
    148                                                        uint32_t Minor,
    149                                                        uint32_t Stepping,
    150                                                        StringRef VendorName,
    151                                                        StringRef ArchName) {
    152   OS << "\t.hsa_code_object_isa " <<
    153         Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) <<
    154         ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
    155 
    156 }
    157 
    158 void
    159 AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
    160   OS << "\t.amd_kernel_code_t\n";
    161   dumpAmdKernelCode(&Header, OS, "\t\t");
    162   OS << "\t.end_amd_kernel_code_t\n";
    163 }
    164 
    165 void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
    166                                                    unsigned Type) {
    167   switch (Type) {
    168     default: llvm_unreachable("Invalid AMDGPU symbol type");
    169     case ELF::STT_AMDGPU_HSA_KERNEL:
    170       OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
    171       break;
    172   }
    173 }
    174 
    175 bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) {
    176   OS << "\t.amd_amdgpu_isa \"" << IsaVersionString << "\"\n";
    177   return true;
    178 }
    179 
    180 bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
    181     const AMDGPU::HSAMD::Metadata &HSAMetadata) {
    182   std::string HSAMetadataString;
    183   if (HSAMD::toString(HSAMetadata, HSAMetadataString))
    184     return false;
    185 
    186   OS << '\t' << HSAMD::AssemblerDirectiveBegin << '\n';
    187   OS << HSAMetadataString << '\n';
    188   OS << '\t' << HSAMD::AssemblerDirectiveEnd << '\n';
    189   return true;
    190 }
    191 
    192 bool AMDGPUTargetAsmStreamer::EmitPALMetadata(
    193     const PALMD::Metadata &PALMetadata) {
    194   std::string PALMetadataString;
    195   if (PALMD::toString(PALMetadata, PALMetadataString))
    196     return false;
    197 
    198   OS << '\t' << PALMD::AssemblerDirective << PALMetadataString << '\n';
    199   return true;
    200 }
    201 
    202 void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
    203     const MCSubtargetInfo &STI, StringRef KernelName,
    204     const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
    205     bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
    206   amdhsa::kernel_descriptor_t DefaultKD = getDefaultAmdhsaKernelDescriptor();
    207 
    208   IsaInfo::IsaVersion IVersion = IsaInfo::getIsaVersion(STI.getFeatureBits());
    209 
    210   OS << "\t.amdhsa_kernel " << KernelName << '\n';
    211 
    212 #define PRINT_IF_NOT_DEFAULT(STREAM, DIRECTIVE, KERNEL_DESC,                   \
    213                              DEFAULT_KERNEL_DESC, MEMBER_NAME, FIELD_NAME)     \
    214   if (AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) !=                  \
    215       AMDHSA_BITS_GET(DEFAULT_KERNEL_DESC.MEMBER_NAME, FIELD_NAME))            \
    216     STREAM << "\t\t" << DIRECTIVE << " "                                       \
    217            << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
    218 
    219   if (KD.group_segment_fixed_size != DefaultKD.group_segment_fixed_size)
    220     OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
    221        << '\n';
    222   if (KD.private_segment_fixed_size != DefaultKD.private_segment_fixed_size)
    223     OS << "\t\t.amdhsa_private_segment_fixed_size "
    224        << KD.private_segment_fixed_size << '\n';
    225 
    226   PRINT_IF_NOT_DEFAULT(
    227       OS, ".amdhsa_user_sgpr_private_segment_buffer", KD, DefaultKD,
    228       kernel_code_properties,
    229       amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
    230   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD, DefaultKD,
    231                        kernel_code_properties,
    232                        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
    233   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_queue_ptr", KD, DefaultKD,
    234                        kernel_code_properties,
    235                        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
    236   PRINT_IF_NOT_DEFAULT(
    237       OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD, DefaultKD,
    238       kernel_code_properties,
    239       amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
    240   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_dispatch_id", KD, DefaultKD,
    241                        kernel_code_properties,
    242                        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
    243   PRINT_IF_NOT_DEFAULT(
    244       OS, ".amdhsa_user_sgpr_flat_scratch_init", KD, DefaultKD,
    245       kernel_code_properties,
    246       amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
    247   PRINT_IF_NOT_DEFAULT(
    248       OS, ".amdhsa_user_sgpr_private_segment_size", KD, DefaultKD,
    249       kernel_code_properties,
    250       amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
    251   PRINT_IF_NOT_DEFAULT(
    252       OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD, DefaultKD,
    253       compute_pgm_rsrc2,
    254       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET);
    255   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD, DefaultKD,
    256                        compute_pgm_rsrc2,
    257                        amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
    258   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD, DefaultKD,
    259                        compute_pgm_rsrc2,
    260                        amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
    261   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD, DefaultKD,
    262                        compute_pgm_rsrc2,
    263                        amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
    264   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_info", KD, DefaultKD,
    265                        compute_pgm_rsrc2,
    266                        amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
    267   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_vgpr_workitem_id", KD, DefaultKD,
    268                        compute_pgm_rsrc2,
    269                        amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
    270 
    271   // These directives are required.
    272   OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
    273   OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n';
    274 
    275   if (!ReserveVCC)
    276     OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n';
    277   if (IVersion.Major >= 7 && !ReserveFlatScr)
    278     OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n';
    279   if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI))
    280     OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n';
    281 
    282   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_round_mode_32", KD, DefaultKD,
    283                        compute_pgm_rsrc1,
    284                        amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
    285   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_round_mode_16_64", KD, DefaultKD,
    286                        compute_pgm_rsrc1,
    287                        amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
    288   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_denorm_mode_32", KD, DefaultKD,
    289                        compute_pgm_rsrc1,
    290                        amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
    291   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_denorm_mode_16_64", KD, DefaultKD,
    292                        compute_pgm_rsrc1,
    293                        amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
    294   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_dx10_clamp", KD, DefaultKD,
    295                        compute_pgm_rsrc1,
    296                        amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
    297   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_ieee_mode", KD, DefaultKD,
    298                        compute_pgm_rsrc1,
    299                        amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
    300   if (IVersion.Major >= 9)
    301     PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_fp16_overflow", KD, DefaultKD,
    302                          compute_pgm_rsrc1,
    303                          amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
    304   PRINT_IF_NOT_DEFAULT(
    305       OS, ".amdhsa_exception_fp_ieee_invalid_op", KD, DefaultKD,
    306       compute_pgm_rsrc2,
    307       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
    308   PRINT_IF_NOT_DEFAULT(
    309       OS, ".amdhsa_exception_fp_denorm_src", KD, DefaultKD, compute_pgm_rsrc2,
    310       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
    311   PRINT_IF_NOT_DEFAULT(
    312       OS, ".amdhsa_exception_fp_ieee_div_zero", KD, DefaultKD,
    313       compute_pgm_rsrc2,
    314       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
    315   PRINT_IF_NOT_DEFAULT(
    316       OS, ".amdhsa_exception_fp_ieee_overflow", KD, DefaultKD,
    317       compute_pgm_rsrc2,
    318       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
    319   PRINT_IF_NOT_DEFAULT(
    320       OS, ".amdhsa_exception_fp_ieee_underflow", KD, DefaultKD,
    321       compute_pgm_rsrc2,
    322       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
    323   PRINT_IF_NOT_DEFAULT(
    324       OS, ".amdhsa_exception_fp_ieee_inexact", KD, DefaultKD, compute_pgm_rsrc2,
    325       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
    326   PRINT_IF_NOT_DEFAULT(
    327       OS, ".amdhsa_exception_int_div_zero", KD, DefaultKD, compute_pgm_rsrc2,
    328       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
    329 #undef PRINT_IF_NOT_DEFAULT
    330 
    331   OS << "\t.end_amdhsa_kernel\n";
    332 }
    333 
    334 //===----------------------------------------------------------------------===//
    335 // AMDGPUTargetELFStreamer
    336 //===----------------------------------------------------------------------===//
    337 
    338 AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(
    339     MCStreamer &S, const MCSubtargetInfo &STI)
    340     : AMDGPUTargetStreamer(S), Streamer(S) {
    341   MCAssembler &MCA = getStreamer().getAssembler();
    342   unsigned EFlags = MCA.getELFHeaderEFlags();
    343 
    344   EFlags &= ~ELF::EF_AMDGPU_MACH;
    345   EFlags |= getMACH(STI.getCPU());
    346 
    347   EFlags &= ~ELF::EF_AMDGPU_XNACK;
    348   if (AMDGPU::hasXNACK(STI))
    349     EFlags |= ELF::EF_AMDGPU_XNACK;
    350 
    351   MCA.setELFHeaderEFlags(EFlags);
    352 }
    353 
    354 MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
    355   return static_cast<MCELFStreamer &>(Streamer);
    356 }
    357 
    358 void AMDGPUTargetELFStreamer::EmitAMDGPUNote(
    359     const MCExpr *DescSZ, unsigned NoteType,
    360     function_ref<void(MCELFStreamer &)> EmitDesc) {
    361   auto &S = getStreamer();
    362   auto &Context = S.getContext();
    363 
    364   auto NameSZ = sizeof(ElfNote::NoteName);
    365 
    366   S.PushSection();
    367   S.SwitchSection(Context.getELFSection(
    368     ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
    369   S.EmitIntValue(NameSZ, 4);                                  // namesz
    370   S.EmitValue(DescSZ, 4);                                     // descz
    371   S.EmitIntValue(NoteType, 4);                                // type
    372   S.EmitBytes(StringRef(ElfNote::NoteName, NameSZ));          // name
    373   S.EmitValueToAlignment(4, 0, 1, 0);                         // padding 0
    374   EmitDesc(S);                                                // desc
    375   S.EmitValueToAlignment(4, 0, 1, 0);                         // padding 0
    376   S.PopSection();
    377 }
    378 
    379 void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {}
    380 
    381 void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
    382     uint32_t Major, uint32_t Minor) {
    383 
    384   EmitAMDGPUNote(
    385     MCConstantExpr::create(8, getContext()),
    386     ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION,
    387     [&](MCELFStreamer &OS){
    388       OS.EmitIntValue(Major, 4);
    389       OS.EmitIntValue(Minor, 4);
    390     }
    391   );
    392 }
    393 
    394 void
    395 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
    396                                                        uint32_t Minor,
    397                                                        uint32_t Stepping,
    398                                                        StringRef VendorName,
    399                                                        StringRef ArchName) {
    400   uint16_t VendorNameSize = VendorName.size() + 1;
    401   uint16_t ArchNameSize = ArchName.size() + 1;
    402 
    403   unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
    404     sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
    405     VendorNameSize + ArchNameSize;
    406 
    407   EmitAMDGPUNote(
    408     MCConstantExpr::create(DescSZ, getContext()),
    409     ElfNote::NT_AMDGPU_HSA_ISA,
    410     [&](MCELFStreamer &OS) {
    411       OS.EmitIntValue(VendorNameSize, 2);
    412       OS.EmitIntValue(ArchNameSize, 2);
    413       OS.EmitIntValue(Major, 4);
    414       OS.EmitIntValue(Minor, 4);
    415       OS.EmitIntValue(Stepping, 4);
    416       OS.EmitBytes(VendorName);
    417       OS.EmitIntValue(0, 1); // NULL terminate VendorName
    418       OS.EmitBytes(ArchName);
    419       OS.EmitIntValue(0, 1); // NULL terminte ArchName
    420     }
    421   );
    422 }
    423 
    424 void
    425 AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
    426 
    427   MCStreamer &OS = getStreamer();
    428   OS.PushSection();
    429   OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
    430   OS.PopSection();
    431 }
    432 
    433 void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
    434                                                    unsigned Type) {
    435   MCSymbolELF *Symbol = cast<MCSymbolELF>(
    436       getStreamer().getContext().getOrCreateSymbol(SymbolName));
    437   Symbol->setType(Type);
    438 }
    439 
    440 bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
    441   // Create two labels to mark the beginning and end of the desc field
    442   // and a MCExpr to calculate the size of the desc field.
    443   auto &Context = getContext();
    444   auto *DescBegin = Context.createTempSymbol();
    445   auto *DescEnd = Context.createTempSymbol();
    446   auto *DescSZ = MCBinaryExpr::createSub(
    447     MCSymbolRefExpr::create(DescEnd, Context),
    448     MCSymbolRefExpr::create(DescBegin, Context), Context);
    449 
    450   EmitAMDGPUNote(
    451     DescSZ,
    452     ELF::NT_AMD_AMDGPU_ISA,
    453     [&](MCELFStreamer &OS) {
    454       OS.EmitLabel(DescBegin);
    455       OS.EmitBytes(IsaVersionString);
    456       OS.EmitLabel(DescEnd);
    457     }
    458   );
    459   return true;
    460 }
    461 
    462 bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
    463     const AMDGPU::HSAMD::Metadata &HSAMetadata) {
    464   std::string HSAMetadataString;
    465   if (HSAMD::toString(HSAMetadata, HSAMetadataString))
    466     return false;
    467 
    468   // Create two labels to mark the beginning and end of the desc field
    469   // and a MCExpr to calculate the size of the desc field.
    470   auto &Context = getContext();
    471   auto *DescBegin = Context.createTempSymbol();
    472   auto *DescEnd = Context.createTempSymbol();
    473   auto *DescSZ = MCBinaryExpr::createSub(
    474     MCSymbolRefExpr::create(DescEnd, Context),
    475     MCSymbolRefExpr::create(DescBegin, Context), Context);
    476 
    477   EmitAMDGPUNote(
    478     DescSZ,
    479     ELF::NT_AMD_AMDGPU_HSA_METADATA,
    480     [&](MCELFStreamer &OS) {
    481       OS.EmitLabel(DescBegin);
    482       OS.EmitBytes(HSAMetadataString);
    483       OS.EmitLabel(DescEnd);
    484     }
    485   );
    486   return true;
    487 }
    488 
    489 bool AMDGPUTargetELFStreamer::EmitPALMetadata(
    490     const PALMD::Metadata &PALMetadata) {
    491   EmitAMDGPUNote(
    492     MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t), getContext()),
    493     ELF::NT_AMD_AMDGPU_PAL_METADATA,
    494     [&](MCELFStreamer &OS){
    495       for (auto I : PALMetadata)
    496         OS.EmitIntValue(I, sizeof(uint32_t));
    497     }
    498   );
    499   return true;
    500 }
    501 
    502 void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
    503     const MCSubtargetInfo &STI, StringRef KernelName,
    504     const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
    505     uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
    506     bool ReserveXNACK) {
    507   auto &Streamer = getStreamer();
    508   auto &Context = Streamer.getContext();
    509 
    510   MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
    511       Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));
    512   KernelDescriptorSymbol->setBinding(ELF::STB_GLOBAL);
    513   KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
    514   KernelDescriptorSymbol->setSize(
    515       MCConstantExpr::create(sizeof(KernelDescriptor), Context));
    516 
    517   MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
    518       Context.getOrCreateSymbol(Twine(KernelName)));
    519   KernelCodeSymbol->setBinding(ELF::STB_LOCAL);
    520 
    521   Streamer.EmitLabel(KernelDescriptorSymbol);
    522   Streamer.EmitBytes(StringRef(
    523       (const char*)&(KernelDescriptor),
    524       offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)));
    525   // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
    526   // expression being created is:
    527   //   (start of kernel code) - (start of kernel descriptor)
    528   // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
    529   Streamer.EmitValue(MCBinaryExpr::createSub(
    530       MCSymbolRefExpr::create(
    531           KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
    532       MCSymbolRefExpr::create(
    533           KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context),
    534       Context),
    535       sizeof(KernelDescriptor.kernel_code_entry_byte_offset));
    536   Streamer.EmitBytes(StringRef(
    537       (const char*)&(KernelDescriptor) +
    538           offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) +
    539           sizeof(KernelDescriptor.kernel_code_entry_byte_offset),
    540       sizeof(KernelDescriptor) -
    541           offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) -
    542           sizeof(KernelDescriptor.kernel_code_entry_byte_offset)));
    543 }
    544