1 //===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// 12 /// The AMDGPUAsmPrinter is used to print both assembly string and also binary 13 /// code. When passed an MCAsmStreamer it prints assembly and when passed 14 /// an MCObjectStreamer it outputs binary code. 15 // 16 //===----------------------------------------------------------------------===// 17 // 18 19 20 #include "AMDGPUAsmPrinter.h" 21 #include "AMDGPU.h" 22 #include "AMDGPUSubtarget.h" 23 #include "R600Defines.h" 24 #include "R600MachineFunctionInfo.h" 25 #include "R600RegisterInfo.h" 26 #include "SIDefines.h" 27 #include "SIMachineFunctionInfo.h" 28 #include "SIRegisterInfo.h" 29 #include "llvm/MC/MCContext.h" 30 #include "llvm/MC/MCSectionELF.h" 31 #include "llvm/MC/MCStreamer.h" 32 #include "llvm/Support/ELF.h" 33 #include "llvm/Support/MathExtras.h" 34 #include "llvm/Support/TargetRegistry.h" 35 #include "llvm/Target/TargetLoweringObjectFile.h" 36 37 using namespace llvm; 38 39 // TODO: This should get the default rounding mode from the kernel. We just set 40 // the default here, but this could change if the OpenCL rounding mode pragmas 41 // are used. 42 // 43 // The denormal mode here should match what is reported by the OpenCL runtime 44 // for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but 45 // can also be override to flush with the -cl-denorms-are-zero compiler flag. 46 // 47 // AMD OpenCL only sets flush none and reports CL_FP_DENORM for double 48 // precision, and leaves single precision to flush all and does not report 49 // CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports 50 // CL_FP_DENORM for both. 51 static uint32_t getFPMode(MachineFunction &) { 52 return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) | 53 FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) | 54 FP_DENORM_MODE_SP(FP_DENORM_FLUSH_NONE) | 55 FP_DENORM_MODE_DP(FP_DENORM_FLUSH_NONE); 56 } 57 58 static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm, 59 MCStreamer &Streamer) { 60 return new AMDGPUAsmPrinter(tm, Streamer); 61 } 62 63 extern "C" void LLVMInitializeR600AsmPrinter() { 64 TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass); 65 } 66 67 AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) 68 : AsmPrinter(TM, Streamer) { 69 DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode(); 70 } 71 72 bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { 73 SetupMachineFunction(MF); 74 75 OutStreamer.emitRawComment(Twine('@') + MF.getName() + Twine(':')); 76 77 MCContext &Context = getObjFileLowering().getContext(); 78 const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config", 79 ELF::SHT_PROGBITS, 0, 80 SectionKind::getReadOnly()); 81 OutStreamer.SwitchSection(ConfigSection); 82 83 const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); 84 SIProgramInfo KernelInfo; 85 if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { 86 getSIProgramInfo(KernelInfo, MF); 87 EmitProgramInfoSI(MF, KernelInfo); 88 } else { 89 EmitProgramInfoR600(MF); 90 } 91 92 DisasmLines.clear(); 93 HexLines.clear(); 94 DisasmLineMaxLen = 0; 95 96 OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); 97 EmitFunctionBody(); 98 99 if (isVerbose()) { 100 const MCSectionELF *CommentSection 101 = Context.getELFSection(".AMDGPU.csdata", 102 ELF::SHT_PROGBITS, 0, 103 SectionKind::getReadOnly()); 104 OutStreamer.SwitchSection(CommentSection); 105 106 if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 107 OutStreamer.emitRawComment(" Kernel info:", false); 108 OutStreamer.emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen), 109 false); 110 OutStreamer.emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR), 111 false); 112 OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR), 113 false); 114 OutStreamer.emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode), 115 false); 116 OutStreamer.emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode), 117 false); 118 } else { 119 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 120 OutStreamer.emitRawComment( 121 Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->StackSize))); 122 } 123 } 124 125 if (STM.dumpCode()) { 126 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 127 MF.dump(); 128 #endif 129 130 if (DisasmEnabled) { 131 OutStreamer.SwitchSection(Context.getELFSection(".AMDGPU.disasm", 132 ELF::SHT_NOTE, 0, 133 SectionKind::getReadOnly())); 134 135 for (size_t i = 0; i < DisasmLines.size(); ++i) { 136 std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' '); 137 Comment += " ; " + HexLines[i] + "\n"; 138 139 OutStreamer.EmitBytes(StringRef(DisasmLines[i])); 140 OutStreamer.EmitBytes(StringRef(Comment)); 141 } 142 } 143 } 144 145 return false; 146 } 147 148 void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) { 149 unsigned MaxGPR = 0; 150 bool killPixel = false; 151 const R600RegisterInfo * RI = 152 static_cast<const R600RegisterInfo*>(TM.getRegisterInfo()); 153 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 154 const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); 155 156 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 157 BB != BB_E; ++BB) { 158 MachineBasicBlock &MBB = *BB; 159 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 160 I != E; ++I) { 161 MachineInstr &MI = *I; 162 if (MI.getOpcode() == AMDGPU::KILLGT) 163 killPixel = true; 164 unsigned numOperands = MI.getNumOperands(); 165 for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { 166 MachineOperand & MO = MI.getOperand(op_idx); 167 if (!MO.isReg()) 168 continue; 169 unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff; 170 171 // Register with value > 127 aren't GPR 172 if (HWReg > 127) 173 continue; 174 MaxGPR = std::max(MaxGPR, HWReg); 175 } 176 } 177 } 178 179 unsigned RsrcReg; 180 if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) { 181 // Evergreen / Northern Islands 182 switch (MFI->ShaderType) { 183 default: // Fall through 184 case ShaderType::COMPUTE: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break; 185 case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break; 186 case ShaderType::PIXEL: RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break; 187 case ShaderType::VERTEX: RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break; 188 } 189 } else { 190 // R600 / R700 191 switch (MFI->ShaderType) { 192 default: // Fall through 193 case ShaderType::GEOMETRY: // Fall through 194 case ShaderType::COMPUTE: // Fall through 195 case ShaderType::VERTEX: RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break; 196 case ShaderType::PIXEL: RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break; 197 } 198 } 199 200 OutStreamer.EmitIntValue(RsrcReg, 4); 201 OutStreamer.EmitIntValue(S_NUM_GPRS(MaxGPR + 1) | 202 S_STACK_SIZE(MFI->StackSize), 4); 203 OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4); 204 OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4); 205 206 if (MFI->ShaderType == ShaderType::COMPUTE) { 207 OutStreamer.EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4); 208 OutStreamer.EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4); 209 } 210 } 211 212 void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, 213 MachineFunction &MF) const { 214 uint64_t CodeSize = 0; 215 unsigned MaxSGPR = 0; 216 unsigned MaxVGPR = 0; 217 bool VCCUsed = false; 218 const SIRegisterInfo * RI = 219 static_cast<const SIRegisterInfo*>(TM.getRegisterInfo()); 220 221 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 222 BB != BB_E; ++BB) { 223 MachineBasicBlock &MBB = *BB; 224 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 225 I != E; ++I) { 226 MachineInstr &MI = *I; 227 228 // TODO: CodeSize should account for multiple functions. 229 CodeSize += MI.getDesc().Size; 230 231 unsigned numOperands = MI.getNumOperands(); 232 for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { 233 MachineOperand &MO = MI.getOperand(op_idx); 234 unsigned width = 0; 235 bool isSGPR = false; 236 237 if (!MO.isReg()) { 238 continue; 239 } 240 unsigned reg = MO.getReg(); 241 if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO || 242 reg == AMDGPU::VCC_HI) { 243 VCCUsed = true; 244 continue; 245 } 246 247 switch (reg) { 248 default: break; 249 case AMDGPU::SCC: 250 case AMDGPU::EXEC: 251 case AMDGPU::M0: 252 continue; 253 } 254 255 if (AMDGPU::SReg_32RegClass.contains(reg)) { 256 isSGPR = true; 257 width = 1; 258 } else if (AMDGPU::VReg_32RegClass.contains(reg)) { 259 isSGPR = false; 260 width = 1; 261 } else if (AMDGPU::SReg_64RegClass.contains(reg)) { 262 isSGPR = true; 263 width = 2; 264 } else if (AMDGPU::VReg_64RegClass.contains(reg)) { 265 isSGPR = false; 266 width = 2; 267 } else if (AMDGPU::VReg_96RegClass.contains(reg)) { 268 isSGPR = false; 269 width = 3; 270 } else if (AMDGPU::SReg_128RegClass.contains(reg)) { 271 isSGPR = true; 272 width = 4; 273 } else if (AMDGPU::VReg_128RegClass.contains(reg)) { 274 isSGPR = false; 275 width = 4; 276 } else if (AMDGPU::SReg_256RegClass.contains(reg)) { 277 isSGPR = true; 278 width = 8; 279 } else if (AMDGPU::VReg_256RegClass.contains(reg)) { 280 isSGPR = false; 281 width = 8; 282 } else if (AMDGPU::SReg_512RegClass.contains(reg)) { 283 isSGPR = true; 284 width = 16; 285 } else if (AMDGPU::VReg_512RegClass.contains(reg)) { 286 isSGPR = false; 287 width = 16; 288 } else { 289 llvm_unreachable("Unknown register class"); 290 } 291 unsigned hwReg = RI->getEncodingValue(reg) & 0xff; 292 unsigned maxUsed = hwReg + width - 1; 293 if (isSGPR) { 294 MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR; 295 } else { 296 MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR; 297 } 298 } 299 } 300 } 301 302 if (VCCUsed) 303 MaxSGPR += 2; 304 305 ProgInfo.NumVGPR = MaxVGPR; 306 ProgInfo.NumSGPR = MaxSGPR; 307 308 // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode 309 // register. 310 ProgInfo.FloatMode = getFPMode(MF); 311 312 // XXX: Not quite sure what this does, but sc seems to unset this. 313 ProgInfo.IEEEMode = 0; 314 315 // Do not clamp NAN to 0. 316 ProgInfo.DX10Clamp = 0; 317 318 ProgInfo.CodeLen = CodeSize; 319 } 320 321 void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF, 322 const SIProgramInfo &KernelInfo) { 323 const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); 324 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 325 326 unsigned RsrcReg; 327 switch (MFI->ShaderType) { 328 default: // Fall through 329 case ShaderType::COMPUTE: RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break; 330 case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break; 331 case ShaderType::PIXEL: RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break; 332 case ShaderType::VERTEX: RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break; 333 } 334 335 unsigned LDSAlignShift; 336 if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) { 337 // LDS is allocated in 64 dword blocks. 338 LDSAlignShift = 8; 339 } else { 340 // LDS is allocated in 128 dword blocks. 341 LDSAlignShift = 9; 342 } 343 344 unsigned LDSBlocks = 345 RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift; 346 347 if (MFI->ShaderType == ShaderType::COMPUTE) { 348 OutStreamer.EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4); 349 350 const uint32_t ComputePGMRSrc1 = 351 S_00B848_VGPRS(KernelInfo.NumVGPR / 4) | 352 S_00B848_SGPRS(KernelInfo.NumSGPR / 8) | 353 S_00B848_PRIORITY(KernelInfo.Priority) | 354 S_00B848_FLOAT_MODE(KernelInfo.FloatMode) | 355 S_00B848_PRIV(KernelInfo.Priv) | 356 S_00B848_DX10_CLAMP(KernelInfo.DX10Clamp) | 357 S_00B848_IEEE_MODE(KernelInfo.DebugMode) | 358 S_00B848_IEEE_MODE(KernelInfo.IEEEMode); 359 360 OutStreamer.EmitIntValue(ComputePGMRSrc1, 4); 361 362 OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4); 363 OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4); 364 } else { 365 OutStreamer.EmitIntValue(RsrcReg, 4); 366 OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) | 367 S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4); 368 } 369 370 if (MFI->ShaderType == ShaderType::PIXEL) { 371 OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4); 372 OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4); 373 OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4); 374 OutStreamer.EmitIntValue(MFI->PSInputAddr, 4); 375 } 376 } 377