Home | History | Annotate | Download | only in MCTargetDesc
      1 //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file provides AMDGPU specific target streamer methods.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "AMDGPUTargetStreamer.h"
     15 #include "SIDefines.h"
     16 #include "Utils/AMDGPUBaseInfo.h"
     17 #include "llvm/ADT/Twine.h"
     18 #include "llvm/MC/MCContext.h"
     19 #include "llvm/MC/MCELFStreamer.h"
     20 #include "llvm/MC/MCObjectFileInfo.h"
     21 #include "llvm/MC/MCSectionELF.h"
     22 #include "llvm/Support/ELF.h"
     23 #include "llvm/Support/FormattedStream.h"
     24 
     25 using namespace llvm;
     26 
     27 AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S)
     28     : MCTargetStreamer(S) { }
     29 
     30 //===----------------------------------------------------------------------===//
     31 // AMDGPUTargetAsmStreamer
     32 //===----------------------------------------------------------------------===//
     33 
     34 AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
     35                                                  formatted_raw_ostream &OS)
     36     : AMDGPUTargetStreamer(S), OS(OS) { }
     37 
     38 void
     39 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
     40                                                            uint32_t Minor) {
     41   OS << "\t.hsa_code_object_version " <<
     42         Twine(Major) << "," << Twine(Minor) << '\n';
     43 }
     44 
     45 void
     46 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
     47                                                        uint32_t Minor,
     48                                                        uint32_t Stepping,
     49                                                        StringRef VendorName,
     50                                                        StringRef ArchName) {
     51   OS << "\t.hsa_code_object_isa " <<
     52         Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) <<
     53         ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
     54 
     55 }
     56 
     57 void
     58 AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
     59   uint64_t ComputePgmRsrc2 = (Header.compute_pgm_resource_registers >> 32);
     60   bool EnableSGPRPrivateSegmentBuffer = (Header.code_properties &
     61       AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
     62   bool EnableSGPRDispatchPtr = (Header.code_properties &
     63       AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
     64   bool EnableSGPRQueuePtr = (Header.code_properties &
     65       AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
     66   bool EnableSGPRKernargSegmentPtr = (Header.code_properties &
     67       AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
     68   bool EnableSGPRDispatchID = (Header.code_properties &
     69       AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
     70   bool EnableSGPRFlatScratchInit = (Header.code_properties &
     71       AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
     72   bool EnableSGPRPrivateSegmentSize = (Header.code_properties &
     73       AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
     74   bool EnableSGPRGridWorkgroupCountX = (Header.code_properties &
     75       AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X);
     76   bool EnableSGPRGridWorkgroupCountY = (Header.code_properties &
     77       AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y);
     78   bool EnableSGPRGridWorkgroupCountZ = (Header.code_properties &
     79       AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z);
     80   bool EnableOrderedAppendGDS = (Header.code_properties &
     81       AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS);
     82   uint32_t PrivateElementSize = (Header.code_properties &
     83       AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE) >>
     84           AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT;
     85   bool IsPtr64 = (Header.code_properties & AMD_CODE_PROPERTY_IS_PTR64);
     86   bool IsDynamicCallstack = (Header.code_properties &
     87       AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK);
     88   bool IsDebugEnabled = (Header.code_properties &
     89       AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED);
     90   bool IsXNackEnabled = (Header.code_properties &
     91       AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED);
     92 
     93   OS << "\t.amd_kernel_code_t\n" <<
     94     "\t\tkernel_code_version_major = " <<
     95         Header.amd_kernel_code_version_major << '\n' <<
     96     "\t\tkernel_code_version_minor = " <<
     97         Header.amd_kernel_code_version_minor << '\n' <<
     98     "\t\tmachine_kind = " <<
     99         Header.amd_machine_kind << '\n' <<
    100     "\t\tmachine_version_major = " <<
    101         Header.amd_machine_version_major << '\n' <<
    102     "\t\tmachine_version_minor = " <<
    103         Header.amd_machine_version_minor << '\n' <<
    104     "\t\tmachine_version_stepping = " <<
    105         Header.amd_machine_version_stepping << '\n' <<
    106     "\t\tkernel_code_entry_byte_offset = " <<
    107         Header.kernel_code_entry_byte_offset << '\n' <<
    108     "\t\tkernel_code_prefetch_byte_size = " <<
    109         Header.kernel_code_prefetch_byte_size << '\n' <<
    110     "\t\tmax_scratch_backing_memory_byte_size = " <<
    111         Header.max_scratch_backing_memory_byte_size << '\n' <<
    112     "\t\tcompute_pgm_rsrc1_vgprs = " <<
    113         G_00B848_VGPRS(Header.compute_pgm_resource_registers) << '\n' <<
    114     "\t\tcompute_pgm_rsrc1_sgprs = " <<
    115         G_00B848_SGPRS(Header.compute_pgm_resource_registers) << '\n' <<
    116     "\t\tcompute_pgm_rsrc1_priority = " <<
    117         G_00B848_PRIORITY(Header.compute_pgm_resource_registers) << '\n' <<
    118     "\t\tcompute_pgm_rsrc1_float_mode = " <<
    119         G_00B848_FLOAT_MODE(Header.compute_pgm_resource_registers) << '\n' <<
    120     "\t\tcompute_pgm_rsrc1_priv = " <<
    121         G_00B848_PRIV(Header.compute_pgm_resource_registers) << '\n' <<
    122     "\t\tcompute_pgm_rsrc1_dx10_clamp = " <<
    123         G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) << '\n' <<
    124     "\t\tcompute_pgm_rsrc1_debug_mode = " <<
    125         G_00B848_DEBUG_MODE(Header.compute_pgm_resource_registers) << '\n' <<
    126     "\t\tcompute_pgm_rsrc1_ieee_mode = " <<
    127         G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) << '\n' <<
    128     "\t\tcompute_pgm_rsrc2_scratch_en = " <<
    129         G_00B84C_SCRATCH_EN(ComputePgmRsrc2) << '\n' <<
    130     "\t\tcompute_pgm_rsrc2_user_sgpr = " <<
    131         G_00B84C_USER_SGPR(ComputePgmRsrc2) << '\n' <<
    132     "\t\tcompute_pgm_rsrc2_tgid_x_en = " <<
    133         G_00B84C_TGID_X_EN(ComputePgmRsrc2) << '\n' <<
    134     "\t\tcompute_pgm_rsrc2_tgid_y_en = " <<
    135         G_00B84C_TGID_Y_EN(ComputePgmRsrc2) << '\n' <<
    136     "\t\tcompute_pgm_rsrc2_tgid_z_en = " <<
    137         G_00B84C_TGID_Z_EN(ComputePgmRsrc2) << '\n' <<
    138     "\t\tcompute_pgm_rsrc2_tg_size_en = " <<
    139         G_00B84C_TG_SIZE_EN(ComputePgmRsrc2) << '\n' <<
    140     "\t\tcompute_pgm_rsrc2_tidig_comp_cnt = " <<
    141         G_00B84C_TIDIG_COMP_CNT(ComputePgmRsrc2) << '\n' <<
    142     "\t\tcompute_pgm_rsrc2_excp_en_msb = " <<
    143         G_00B84C_EXCP_EN_MSB(ComputePgmRsrc2) << '\n' <<
    144     "\t\tcompute_pgm_rsrc2_lds_size = " <<
    145         G_00B84C_LDS_SIZE(ComputePgmRsrc2) << '\n' <<
    146     "\t\tcompute_pgm_rsrc2_excp_en = " <<
    147         G_00B84C_EXCP_EN(ComputePgmRsrc2) << '\n' <<
    148 
    149     "\t\tenable_sgpr_private_segment_buffer = " <<
    150         EnableSGPRPrivateSegmentBuffer << '\n' <<
    151     "\t\tenable_sgpr_dispatch_ptr = " <<
    152         EnableSGPRDispatchPtr << '\n' <<
    153     "\t\tenable_sgpr_queue_ptr = " <<
    154         EnableSGPRQueuePtr << '\n' <<
    155     "\t\tenable_sgpr_kernarg_segment_ptr = " <<
    156         EnableSGPRKernargSegmentPtr << '\n' <<
    157     "\t\tenable_sgpr_dispatch_id = " <<
    158         EnableSGPRDispatchID << '\n' <<
    159     "\t\tenable_sgpr_flat_scratch_init = " <<
    160         EnableSGPRFlatScratchInit << '\n' <<
    161     "\t\tenable_sgpr_private_segment_size = " <<
    162         EnableSGPRPrivateSegmentSize << '\n' <<
    163     "\t\tenable_sgpr_grid_workgroup_count_x = " <<
    164         EnableSGPRGridWorkgroupCountX << '\n' <<
    165     "\t\tenable_sgpr_grid_workgroup_count_y = " <<
    166         EnableSGPRGridWorkgroupCountY << '\n' <<
    167     "\t\tenable_sgpr_grid_workgroup_count_z = " <<
    168         EnableSGPRGridWorkgroupCountZ << '\n' <<
    169     "\t\tenable_ordered_append_gds = " <<
    170         EnableOrderedAppendGDS << '\n' <<
    171     "\t\tprivate_element_size = " <<
    172         PrivateElementSize << '\n' <<
    173     "\t\tis_ptr64 = " <<
    174         IsPtr64 << '\n' <<
    175     "\t\tis_dynamic_callstack = " <<
    176         IsDynamicCallstack << '\n' <<
    177     "\t\tis_debug_enabled = " <<
    178         IsDebugEnabled << '\n' <<
    179     "\t\tis_xnack_enabled = " <<
    180         IsXNackEnabled << '\n' <<
    181     "\t\tworkitem_private_segment_byte_size = " <<
    182         Header.workitem_private_segment_byte_size << '\n' <<
    183     "\t\tworkgroup_group_segment_byte_size = " <<
    184         Header.workgroup_group_segment_byte_size << '\n' <<
    185     "\t\tgds_segment_byte_size = " <<
    186         Header.gds_segment_byte_size << '\n' <<
    187     "\t\tkernarg_segment_byte_size = " <<
    188         Header.kernarg_segment_byte_size << '\n' <<
    189     "\t\tworkgroup_fbarrier_count = " <<
    190         Header.workgroup_fbarrier_count << '\n' <<
    191     "\t\twavefront_sgpr_count = " <<
    192         Header.wavefront_sgpr_count << '\n' <<
    193     "\t\tworkitem_vgpr_count = " <<
    194         Header.workitem_vgpr_count << '\n' <<
    195     "\t\treserved_vgpr_first = " <<
    196         Header.reserved_vgpr_first << '\n' <<
    197     "\t\treserved_vgpr_count = " <<
    198         Header.reserved_vgpr_count << '\n' <<
    199     "\t\treserved_sgpr_first = " <<
    200         Header.reserved_sgpr_first << '\n' <<
    201     "\t\treserved_sgpr_count = " <<
    202         Header.reserved_sgpr_count << '\n' <<
    203     "\t\tdebug_wavefront_private_segment_offset_sgpr = " <<
    204         Header.debug_wavefront_private_segment_offset_sgpr << '\n' <<
    205     "\t\tdebug_private_segment_buffer_sgpr = " <<
    206         Header.debug_private_segment_buffer_sgpr << '\n' <<
    207     "\t\tkernarg_segment_alignment = " <<
    208         (uint32_t)Header.kernarg_segment_alignment << '\n' <<
    209     "\t\tgroup_segment_alignment = " <<
    210         (uint32_t)Header.group_segment_alignment << '\n' <<
    211     "\t\tprivate_segment_alignment = " <<
    212         (uint32_t)Header.private_segment_alignment << '\n' <<
    213     "\t\twavefront_size = " <<
    214         (uint32_t)Header.wavefront_size << '\n' <<
    215     "\t\tcall_convention = " <<
    216         Header.call_convention << '\n' <<
    217     "\t\truntime_loader_kernel_symbol = " <<
    218         Header.runtime_loader_kernel_symbol << '\n' <<
    219     // TODO: control_directives
    220     "\t.end_amd_kernel_code_t\n";
    221 
    222 }
    223 
    224 void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
    225                                                    unsigned Type) {
    226   switch (Type) {
    227     default: llvm_unreachable("Invalid AMDGPU symbol type");
    228     case ELF::STT_AMDGPU_HSA_KERNEL:
    229       OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
    230       break;
    231   }
    232 }
    233 
    234 void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaModuleScopeGlobal(
    235     StringRef GlobalName) {
    236   OS << "\t.amdgpu_hsa_module_global " << GlobalName << '\n';
    237 }
    238 
    239 void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal(
    240     StringRef GlobalName) {
    241   OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
    242 }
    243 
    244 //===----------------------------------------------------------------------===//
    245 // AMDGPUTargetELFStreamer
    246 //===----------------------------------------------------------------------===//
    247 
    248 AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S)
    249     : AMDGPUTargetStreamer(S), Streamer(S) { }
    250 
    251 MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
    252   return static_cast<MCELFStreamer &>(Streamer);
    253 }
    254 
    255 void
    256 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
    257                                                            uint32_t Minor) {
    258   MCStreamer &OS = getStreamer();
    259   MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
    260 
    261   unsigned NameSZ = 4;
    262 
    263   OS.PushSection();
    264   OS.SwitchSection(Note);
    265   OS.EmitIntValue(NameSZ, 4);                            // namesz
    266   OS.EmitIntValue(8, 4);                                 // descz
    267   OS.EmitIntValue(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, 4); // type
    268   OS.EmitBytes(StringRef("AMD", NameSZ));                // name
    269   OS.EmitIntValue(Major, 4);                             // desc
    270   OS.EmitIntValue(Minor, 4);
    271   OS.EmitValueToAlignment(4);
    272   OS.PopSection();
    273 }
    274 
    275 void
    276 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
    277                                                        uint32_t Minor,
    278                                                        uint32_t Stepping,
    279                                                        StringRef VendorName,
    280                                                        StringRef ArchName) {
    281   MCStreamer &OS = getStreamer();
    282   MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
    283 
    284   unsigned NameSZ = 4;
    285   uint16_t VendorNameSize = VendorName.size() + 1;
    286   uint16_t ArchNameSize = ArchName.size() + 1;
    287   unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
    288                     sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
    289                     VendorNameSize + ArchNameSize;
    290 
    291   OS.PushSection();
    292   OS.SwitchSection(Note);
    293   OS.EmitIntValue(NameSZ, 4);                            // namesz
    294   OS.EmitIntValue(DescSZ, 4);                            // descsz
    295   OS.EmitIntValue(NT_AMDGPU_HSA_ISA, 4);                 // type
    296   OS.EmitBytes(StringRef("AMD", 4));                     // name
    297   OS.EmitIntValue(VendorNameSize, 2);                    // desc
    298   OS.EmitIntValue(ArchNameSize, 2);
    299   OS.EmitIntValue(Major, 4);
    300   OS.EmitIntValue(Minor, 4);
    301   OS.EmitIntValue(Stepping, 4);
    302   OS.EmitBytes(VendorName);
    303   OS.EmitIntValue(0, 1); // NULL terminate VendorName
    304   OS.EmitBytes(ArchName);
    305   OS.EmitIntValue(0, 1); // NULL terminte ArchName
    306   OS.EmitValueToAlignment(4);
    307   OS.PopSection();
    308 }
    309 
    310 void
    311 AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
    312 
    313   MCStreamer &OS = getStreamer();
    314   OS.PushSection();
    315   // The MCObjectFileInfo that is available to the assembler is a generic
    316   // implementation and not AMDGPUHSATargetObjectFile, so we can't use
    317   // MCObjectFileInfo::getTextSection() here for fetching the HSATextSection.
    318   OS.SwitchSection(AMDGPU::getHSATextSection(OS.getContext()));
    319   OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
    320   OS.PopSection();
    321 }
    322 
    323 void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
    324                                                    unsigned Type) {
    325   MCSymbolELF *Symbol = cast<MCSymbolELF>(
    326       getStreamer().getContext().getOrCreateSymbol(SymbolName));
    327   Symbol->setType(ELF::STT_AMDGPU_HSA_KERNEL);
    328 }
    329 
    330 void AMDGPUTargetELFStreamer::EmitAMDGPUHsaModuleScopeGlobal(
    331     StringRef GlobalName) {
    332 
    333   MCSymbolELF *Symbol = cast<MCSymbolELF>(
    334       getStreamer().getContext().getOrCreateSymbol(GlobalName));
    335   Symbol->setType(ELF::STT_OBJECT);
    336   Symbol->setBinding(ELF::STB_LOCAL);
    337 }
    338 
    339 void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal(
    340     StringRef GlobalName) {
    341 
    342   MCSymbolELF *Symbol = cast<MCSymbolELF>(
    343       getStreamer().getContext().getOrCreateSymbol(GlobalName));
    344   Symbol->setType(ELF::STT_OBJECT);
    345   Symbol->setBinding(ELF::STB_GLOBAL);
    346 }
    347