1 //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file provides AMDGPU specific target streamer methods. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPUTargetStreamer.h" 15 #include "SIDefines.h" 16 #include "Utils/AMDGPUBaseInfo.h" 17 #include "llvm/ADT/Twine.h" 18 #include "llvm/MC/MCContext.h" 19 #include "llvm/MC/MCELFStreamer.h" 20 #include "llvm/MC/MCObjectFileInfo.h" 21 #include "llvm/MC/MCSectionELF.h" 22 #include "llvm/Support/ELF.h" 23 #include "llvm/Support/FormattedStream.h" 24 25 using namespace llvm; 26 27 AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S) 28 : MCTargetStreamer(S) { } 29 30 //===----------------------------------------------------------------------===// 31 // AMDGPUTargetAsmStreamer 32 //===----------------------------------------------------------------------===// 33 34 AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S, 35 formatted_raw_ostream &OS) 36 : AMDGPUTargetStreamer(S), OS(OS) { } 37 38 void 39 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major, 40 uint32_t Minor) { 41 OS << "\t.hsa_code_object_version " << 42 Twine(Major) << "," << Twine(Minor) << '\n'; 43 } 44 45 void 46 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major, 47 uint32_t Minor, 48 uint32_t Stepping, 49 StringRef VendorName, 50 StringRef ArchName) { 51 OS << "\t.hsa_code_object_isa " << 52 Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) << 53 ",\"" << VendorName << "\",\"" << ArchName << "\"\n"; 54 55 } 56 57 void 58 AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { 59 uint64_t ComputePgmRsrc2 = (Header.compute_pgm_resource_registers >> 32); 60 bool EnableSGPRPrivateSegmentBuffer = (Header.code_properties & 61 AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); 62 bool EnableSGPRDispatchPtr = (Header.code_properties & 63 AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); 64 bool EnableSGPRQueuePtr = (Header.code_properties & 65 AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); 66 bool EnableSGPRKernargSegmentPtr = (Header.code_properties & 67 AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); 68 bool EnableSGPRDispatchID = (Header.code_properties & 69 AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); 70 bool EnableSGPRFlatScratchInit = (Header.code_properties & 71 AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); 72 bool EnableSGPRPrivateSegmentSize = (Header.code_properties & 73 AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); 74 bool EnableSGPRGridWorkgroupCountX = (Header.code_properties & 75 AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X); 76 bool EnableSGPRGridWorkgroupCountY = (Header.code_properties & 77 AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y); 78 bool EnableSGPRGridWorkgroupCountZ = (Header.code_properties & 79 AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z); 80 bool EnableOrderedAppendGDS = (Header.code_properties & 81 AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS); 82 uint32_t PrivateElementSize = (Header.code_properties & 83 AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE) >> 84 AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT; 85 bool IsPtr64 = (Header.code_properties & AMD_CODE_PROPERTY_IS_PTR64); 86 bool IsDynamicCallstack = (Header.code_properties & 87 AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK); 88 bool IsDebugEnabled = (Header.code_properties & 89 AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED); 90 bool IsXNackEnabled = (Header.code_properties & 91 AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED); 92 93 OS << "\t.amd_kernel_code_t\n" << 94 "\t\tkernel_code_version_major = " << 95 Header.amd_kernel_code_version_major << '\n' << 96 "\t\tkernel_code_version_minor = " << 97 Header.amd_kernel_code_version_minor << '\n' << 98 "\t\tmachine_kind = " << 99 Header.amd_machine_kind << '\n' << 100 "\t\tmachine_version_major = " << 101 Header.amd_machine_version_major << '\n' << 102 "\t\tmachine_version_minor = " << 103 Header.amd_machine_version_minor << '\n' << 104 "\t\tmachine_version_stepping = " << 105 Header.amd_machine_version_stepping << '\n' << 106 "\t\tkernel_code_entry_byte_offset = " << 107 Header.kernel_code_entry_byte_offset << '\n' << 108 "\t\tkernel_code_prefetch_byte_size = " << 109 Header.kernel_code_prefetch_byte_size << '\n' << 110 "\t\tmax_scratch_backing_memory_byte_size = " << 111 Header.max_scratch_backing_memory_byte_size << '\n' << 112 "\t\tcompute_pgm_rsrc1_vgprs = " << 113 G_00B848_VGPRS(Header.compute_pgm_resource_registers) << '\n' << 114 "\t\tcompute_pgm_rsrc1_sgprs = " << 115 G_00B848_SGPRS(Header.compute_pgm_resource_registers) << '\n' << 116 "\t\tcompute_pgm_rsrc1_priority = " << 117 G_00B848_PRIORITY(Header.compute_pgm_resource_registers) << '\n' << 118 "\t\tcompute_pgm_rsrc1_float_mode = " << 119 G_00B848_FLOAT_MODE(Header.compute_pgm_resource_registers) << '\n' << 120 "\t\tcompute_pgm_rsrc1_priv = " << 121 G_00B848_PRIV(Header.compute_pgm_resource_registers) << '\n' << 122 "\t\tcompute_pgm_rsrc1_dx10_clamp = " << 123 G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) << '\n' << 124 "\t\tcompute_pgm_rsrc1_debug_mode = " << 125 G_00B848_DEBUG_MODE(Header.compute_pgm_resource_registers) << '\n' << 126 "\t\tcompute_pgm_rsrc1_ieee_mode = " << 127 G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) << '\n' << 128 "\t\tcompute_pgm_rsrc2_scratch_en = " << 129 G_00B84C_SCRATCH_EN(ComputePgmRsrc2) << '\n' << 130 "\t\tcompute_pgm_rsrc2_user_sgpr = " << 131 G_00B84C_USER_SGPR(ComputePgmRsrc2) << '\n' << 132 "\t\tcompute_pgm_rsrc2_tgid_x_en = " << 133 G_00B84C_TGID_X_EN(ComputePgmRsrc2) << '\n' << 134 "\t\tcompute_pgm_rsrc2_tgid_y_en = " << 135 G_00B84C_TGID_Y_EN(ComputePgmRsrc2) << '\n' << 136 "\t\tcompute_pgm_rsrc2_tgid_z_en = " << 137 G_00B84C_TGID_Z_EN(ComputePgmRsrc2) << '\n' << 138 "\t\tcompute_pgm_rsrc2_tg_size_en = " << 139 G_00B84C_TG_SIZE_EN(ComputePgmRsrc2) << '\n' << 140 "\t\tcompute_pgm_rsrc2_tidig_comp_cnt = " << 141 G_00B84C_TIDIG_COMP_CNT(ComputePgmRsrc2) << '\n' << 142 "\t\tcompute_pgm_rsrc2_excp_en_msb = " << 143 G_00B84C_EXCP_EN_MSB(ComputePgmRsrc2) << '\n' << 144 "\t\tcompute_pgm_rsrc2_lds_size = " << 145 G_00B84C_LDS_SIZE(ComputePgmRsrc2) << '\n' << 146 "\t\tcompute_pgm_rsrc2_excp_en = " << 147 G_00B84C_EXCP_EN(ComputePgmRsrc2) << '\n' << 148 149 "\t\tenable_sgpr_private_segment_buffer = " << 150 EnableSGPRPrivateSegmentBuffer << '\n' << 151 "\t\tenable_sgpr_dispatch_ptr = " << 152 EnableSGPRDispatchPtr << '\n' << 153 "\t\tenable_sgpr_queue_ptr = " << 154 EnableSGPRQueuePtr << '\n' << 155 "\t\tenable_sgpr_kernarg_segment_ptr = " << 156 EnableSGPRKernargSegmentPtr << '\n' << 157 "\t\tenable_sgpr_dispatch_id = " << 158 EnableSGPRDispatchID << '\n' << 159 "\t\tenable_sgpr_flat_scratch_init = " << 160 EnableSGPRFlatScratchInit << '\n' << 161 "\t\tenable_sgpr_private_segment_size = " << 162 EnableSGPRPrivateSegmentSize << '\n' << 163 "\t\tenable_sgpr_grid_workgroup_count_x = " << 164 EnableSGPRGridWorkgroupCountX << '\n' << 165 "\t\tenable_sgpr_grid_workgroup_count_y = " << 166 EnableSGPRGridWorkgroupCountY << '\n' << 167 "\t\tenable_sgpr_grid_workgroup_count_z = " << 168 EnableSGPRGridWorkgroupCountZ << '\n' << 169 "\t\tenable_ordered_append_gds = " << 170 EnableOrderedAppendGDS << '\n' << 171 "\t\tprivate_element_size = " << 172 PrivateElementSize << '\n' << 173 "\t\tis_ptr64 = " << 174 IsPtr64 << '\n' << 175 "\t\tis_dynamic_callstack = " << 176 IsDynamicCallstack << '\n' << 177 "\t\tis_debug_enabled = " << 178 IsDebugEnabled << '\n' << 179 "\t\tis_xnack_enabled = " << 180 IsXNackEnabled << '\n' << 181 "\t\tworkitem_private_segment_byte_size = " << 182 Header.workitem_private_segment_byte_size << '\n' << 183 "\t\tworkgroup_group_segment_byte_size = " << 184 Header.workgroup_group_segment_byte_size << '\n' << 185 "\t\tgds_segment_byte_size = " << 186 Header.gds_segment_byte_size << '\n' << 187 "\t\tkernarg_segment_byte_size = " << 188 Header.kernarg_segment_byte_size << '\n' << 189 "\t\tworkgroup_fbarrier_count = " << 190 Header.workgroup_fbarrier_count << '\n' << 191 "\t\twavefront_sgpr_count = " << 192 Header.wavefront_sgpr_count << '\n' << 193 "\t\tworkitem_vgpr_count = " << 194 Header.workitem_vgpr_count << '\n' << 195 "\t\treserved_vgpr_first = " << 196 Header.reserved_vgpr_first << '\n' << 197 "\t\treserved_vgpr_count = " << 198 Header.reserved_vgpr_count << '\n' << 199 "\t\treserved_sgpr_first = " << 200 Header.reserved_sgpr_first << '\n' << 201 "\t\treserved_sgpr_count = " << 202 Header.reserved_sgpr_count << '\n' << 203 "\t\tdebug_wavefront_private_segment_offset_sgpr = " << 204 Header.debug_wavefront_private_segment_offset_sgpr << '\n' << 205 "\t\tdebug_private_segment_buffer_sgpr = " << 206 Header.debug_private_segment_buffer_sgpr << '\n' << 207 "\t\tkernarg_segment_alignment = " << 208 (uint32_t)Header.kernarg_segment_alignment << '\n' << 209 "\t\tgroup_segment_alignment = " << 210 (uint32_t)Header.group_segment_alignment << '\n' << 211 "\t\tprivate_segment_alignment = " << 212 (uint32_t)Header.private_segment_alignment << '\n' << 213 "\t\twavefront_size = " << 214 (uint32_t)Header.wavefront_size << '\n' << 215 "\t\tcall_convention = " << 216 Header.call_convention << '\n' << 217 "\t\truntime_loader_kernel_symbol = " << 218 Header.runtime_loader_kernel_symbol << '\n' << 219 // TODO: control_directives 220 "\t.end_amd_kernel_code_t\n"; 221 222 } 223 224 void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, 225 unsigned Type) { 226 switch (Type) { 227 default: llvm_unreachable("Invalid AMDGPU symbol type"); 228 case ELF::STT_AMDGPU_HSA_KERNEL: 229 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ; 230 break; 231 } 232 } 233 234 void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaModuleScopeGlobal( 235 StringRef GlobalName) { 236 OS << "\t.amdgpu_hsa_module_global " << GlobalName << '\n'; 237 } 238 239 void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal( 240 StringRef GlobalName) { 241 OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n'; 242 } 243 244 //===----------------------------------------------------------------------===// 245 // AMDGPUTargetELFStreamer 246 //===----------------------------------------------------------------------===// 247 248 AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S) 249 : AMDGPUTargetStreamer(S), Streamer(S) { } 250 251 MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() { 252 return static_cast<MCELFStreamer &>(Streamer); 253 } 254 255 void 256 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major, 257 uint32_t Minor) { 258 MCStreamer &OS = getStreamer(); 259 MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0); 260 261 unsigned NameSZ = 4; 262 263 OS.PushSection(); 264 OS.SwitchSection(Note); 265 OS.EmitIntValue(NameSZ, 4); // namesz 266 OS.EmitIntValue(8, 4); // descz 267 OS.EmitIntValue(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, 4); // type 268 OS.EmitBytes(StringRef("AMD", NameSZ)); // name 269 OS.EmitIntValue(Major, 4); // desc 270 OS.EmitIntValue(Minor, 4); 271 OS.EmitValueToAlignment(4); 272 OS.PopSection(); 273 } 274 275 void 276 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major, 277 uint32_t Minor, 278 uint32_t Stepping, 279 StringRef VendorName, 280 StringRef ArchName) { 281 MCStreamer &OS = getStreamer(); 282 MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0); 283 284 unsigned NameSZ = 4; 285 uint16_t VendorNameSize = VendorName.size() + 1; 286 uint16_t ArchNameSize = ArchName.size() + 1; 287 unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) + 288 sizeof(Major) + sizeof(Minor) + sizeof(Stepping) + 289 VendorNameSize + ArchNameSize; 290 291 OS.PushSection(); 292 OS.SwitchSection(Note); 293 OS.EmitIntValue(NameSZ, 4); // namesz 294 OS.EmitIntValue(DescSZ, 4); // descsz 295 OS.EmitIntValue(NT_AMDGPU_HSA_ISA, 4); // type 296 OS.EmitBytes(StringRef("AMD", 4)); // name 297 OS.EmitIntValue(VendorNameSize, 2); // desc 298 OS.EmitIntValue(ArchNameSize, 2); 299 OS.EmitIntValue(Major, 4); 300 OS.EmitIntValue(Minor, 4); 301 OS.EmitIntValue(Stepping, 4); 302 OS.EmitBytes(VendorName); 303 OS.EmitIntValue(0, 1); // NULL terminate VendorName 304 OS.EmitBytes(ArchName); 305 OS.EmitIntValue(0, 1); // NULL terminte ArchName 306 OS.EmitValueToAlignment(4); 307 OS.PopSection(); 308 } 309 310 void 311 AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { 312 313 MCStreamer &OS = getStreamer(); 314 OS.PushSection(); 315 // The MCObjectFileInfo that is available to the assembler is a generic 316 // implementation and not AMDGPUHSATargetObjectFile, so we can't use 317 // MCObjectFileInfo::getTextSection() here for fetching the HSATextSection. 318 OS.SwitchSection(AMDGPU::getHSATextSection(OS.getContext())); 319 OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header))); 320 OS.PopSection(); 321 } 322 323 void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, 324 unsigned Type) { 325 MCSymbolELF *Symbol = cast<MCSymbolELF>( 326 getStreamer().getContext().getOrCreateSymbol(SymbolName)); 327 Symbol->setType(ELF::STT_AMDGPU_HSA_KERNEL); 328 } 329 330 void AMDGPUTargetELFStreamer::EmitAMDGPUHsaModuleScopeGlobal( 331 StringRef GlobalName) { 332 333 MCSymbolELF *Symbol = cast<MCSymbolELF>( 334 getStreamer().getContext().getOrCreateSymbol(GlobalName)); 335 Symbol->setType(ELF::STT_OBJECT); 336 Symbol->setBinding(ELF::STB_LOCAL); 337 } 338 339 void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal( 340 StringRef GlobalName) { 341 342 MCSymbolELF *Symbol = cast<MCSymbolELF>( 343 getStreamer().getContext().getOrCreateSymbol(GlobalName)); 344 Symbol->setType(ELF::STT_OBJECT); 345 Symbol->setBinding(ELF::STB_GLOBAL); 346 } 347