Home | History | Annotate | Download | only in AMDGPU
      1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// AMDGPU HSA Metadata Streamer.
     12 ///
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "AMDGPUHSAMetadataStreamer.h"
     17 #include "AMDGPU.h"
     18 #include "AMDGPUSubtarget.h"
     19 #include "SIMachineFunctionInfo.h"
     20 #include "SIProgramInfo.h"
     21 #include "Utils/AMDGPUBaseInfo.h"
     22 #include "llvm/ADT/StringSwitch.h"
     23 #include "llvm/IR/Constants.h"
     24 #include "llvm/IR/Module.h"
     25 #include "llvm/Support/raw_ostream.h"
     26 
     27 namespace llvm {
     28 
     29 static cl::opt<bool> DumpHSAMetadata(
     30     "amdgpu-dump-hsa-metadata",
     31     cl::desc("Dump AMDGPU HSA Metadata"));
     32 static cl::opt<bool> VerifyHSAMetadata(
     33     "amdgpu-verify-hsa-metadata",
     34     cl::desc("Verify AMDGPU HSA Metadata"));
     35 
     36 namespace AMDGPU {
     37 namespace HSAMD {
     38 
     39 void MetadataStreamer::dump(StringRef HSAMetadataString) const {
     40   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
     41 }
     42 
     43 void MetadataStreamer::verify(StringRef HSAMetadataString) const {
     44   errs() << "AMDGPU HSA Metadata Parser Test: ";
     45 
     46   HSAMD::Metadata FromHSAMetadataString;
     47   if (fromString(HSAMetadataString, FromHSAMetadataString)) {
     48     errs() << "FAIL\n";
     49     return;
     50   }
     51 
     52   std::string ToHSAMetadataString;
     53   if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
     54     errs() << "FAIL\n";
     55     return;
     56   }
     57 
     58   errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
     59          << '\n';
     60   if (HSAMetadataString != ToHSAMetadataString) {
     61     errs() << "Original input: " << HSAMetadataString << '\n'
     62            << "Produced output: " << ToHSAMetadataString << '\n';
     63   }
     64 }
     65 
     66 AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
     67   if (AccQual.empty())
     68     return AccessQualifier::Unknown;
     69 
     70   return StringSwitch<AccessQualifier>(AccQual)
     71              .Case("read_only",  AccessQualifier::ReadOnly)
     72              .Case("write_only", AccessQualifier::WriteOnly)
     73              .Case("read_write", AccessQualifier::ReadWrite)
     74              .Default(AccessQualifier::Default);
     75 }
     76 
     77 AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer(
     78     unsigned AddressSpace) const {
     79   if (AddressSpace == AMDGPUASI.PRIVATE_ADDRESS)
     80     return AddressSpaceQualifier::Private;
     81   if (AddressSpace == AMDGPUASI.GLOBAL_ADDRESS)
     82     return AddressSpaceQualifier::Global;
     83   if (AddressSpace == AMDGPUASI.CONSTANT_ADDRESS)
     84     return AddressSpaceQualifier::Constant;
     85   if (AddressSpace == AMDGPUASI.LOCAL_ADDRESS)
     86     return AddressSpaceQualifier::Local;
     87   if (AddressSpace == AMDGPUASI.FLAT_ADDRESS)
     88     return AddressSpaceQualifier::Generic;
     89   if (AddressSpace == AMDGPUASI.REGION_ADDRESS)
     90     return AddressSpaceQualifier::Region;
     91 
     92   llvm_unreachable("Unknown address space qualifier");
     93 }
     94 
     95 ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
     96                                          StringRef BaseTypeName) const {
     97   if (TypeQual.find("pipe") != StringRef::npos)
     98     return ValueKind::Pipe;
     99 
    100   return StringSwitch<ValueKind>(BaseTypeName)
    101              .Case("image1d_t", ValueKind::Image)
    102              .Case("image1d_array_t", ValueKind::Image)
    103              .Case("image1d_buffer_t", ValueKind::Image)
    104              .Case("image2d_t", ValueKind::Image)
    105              .Case("image2d_array_t", ValueKind::Image)
    106              .Case("image2d_array_depth_t", ValueKind::Image)
    107              .Case("image2d_array_msaa_t", ValueKind::Image)
    108              .Case("image2d_array_msaa_depth_t", ValueKind::Image)
    109              .Case("image2d_depth_t", ValueKind::Image)
    110              .Case("image2d_msaa_t", ValueKind::Image)
    111              .Case("image2d_msaa_depth_t", ValueKind::Image)
    112              .Case("image3d_t", ValueKind::Image)
    113              .Case("sampler_t", ValueKind::Sampler)
    114              .Case("queue_t", ValueKind::Queue)
    115              .Default(isa<PointerType>(Ty) ?
    116                           (Ty->getPointerAddressSpace() ==
    117                            AMDGPUASI.LOCAL_ADDRESS ?
    118                            ValueKind::DynamicSharedPointer :
    119                            ValueKind::GlobalBuffer) :
    120                       ValueKind::ByValue);
    121 }
    122 
    123 ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const {
    124   switch (Ty->getTypeID()) {
    125   case Type::IntegerTyID: {
    126     auto Signed = !TypeName.startswith("u");
    127     switch (Ty->getIntegerBitWidth()) {
    128     case 8:
    129       return Signed ? ValueType::I8 : ValueType::U8;
    130     case 16:
    131       return Signed ? ValueType::I16 : ValueType::U16;
    132     case 32:
    133       return Signed ? ValueType::I32 : ValueType::U32;
    134     case 64:
    135       return Signed ? ValueType::I64 : ValueType::U64;
    136     default:
    137       return ValueType::Struct;
    138     }
    139   }
    140   case Type::HalfTyID:
    141     return ValueType::F16;
    142   case Type::FloatTyID:
    143     return ValueType::F32;
    144   case Type::DoubleTyID:
    145     return ValueType::F64;
    146   case Type::PointerTyID:
    147     return getValueType(Ty->getPointerElementType(), TypeName);
    148   case Type::VectorTyID:
    149     return getValueType(Ty->getVectorElementType(), TypeName);
    150   default:
    151     return ValueType::Struct;
    152   }
    153 }
    154 
    155 std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const {
    156   switch (Ty->getTypeID()) {
    157   case Type::IntegerTyID: {
    158     if (!Signed)
    159       return (Twine('u') + getTypeName(Ty, true)).str();
    160 
    161     auto BitWidth = Ty->getIntegerBitWidth();
    162     switch (BitWidth) {
    163     case 8:
    164       return "char";
    165     case 16:
    166       return "short";
    167     case 32:
    168       return "int";
    169     case 64:
    170       return "long";
    171     default:
    172       return (Twine('i') + Twine(BitWidth)).str();
    173     }
    174   }
    175   case Type::HalfTyID:
    176     return "half";
    177   case Type::FloatTyID:
    178     return "float";
    179   case Type::DoubleTyID:
    180     return "double";
    181   case Type::VectorTyID: {
    182     auto VecTy = cast<VectorType>(Ty);
    183     auto ElTy = VecTy->getElementType();
    184     auto NumElements = VecTy->getVectorNumElements();
    185     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
    186   }
    187   default:
    188     return "unknown";
    189   }
    190 }
    191 
    192 std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
    193     MDNode *Node) const {
    194   std::vector<uint32_t> Dims;
    195   if (Node->getNumOperands() != 3)
    196     return Dims;
    197 
    198   for (auto &Op : Node->operands())
    199     Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
    200   return Dims;
    201 }
    202 
    203 Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps(
    204     const MachineFunction &MF,
    205     const SIProgramInfo &ProgramInfo) const {
    206   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
    207   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
    208   HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
    209   const Function &F = MF.getFunction();
    210 
    211   assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
    212          F.getCallingConv() == CallingConv::SPIR_KERNEL);
    213 
    214   unsigned MaxKernArgAlign;
    215   HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
    216                                                                MaxKernArgAlign);
    217   HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
    218   HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
    219   HSACodeProps.mKernargSegmentAlign = std::max(MaxKernArgAlign, 4u);
    220   HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
    221   HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
    222   HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
    223   HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
    224   HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
    225   HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
    226   HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
    227   HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
    228 
    229   return HSACodeProps;
    230 }
    231 
    232 Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps(
    233     const MachineFunction &MF,
    234     const SIProgramInfo &ProgramInfo) const {
    235   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
    236   HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
    237 
    238   if (!STM.debuggerSupported())
    239     return HSADebugProps;
    240 
    241   HSADebugProps.mDebuggerABIVersion.push_back(1);
    242   HSADebugProps.mDebuggerABIVersion.push_back(0);
    243 
    244   if (STM.debuggerEmitPrologue()) {
    245     HSADebugProps.mPrivateSegmentBufferSGPR =
    246         ProgramInfo.DebuggerPrivateSegmentBufferSGPR;
    247     HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR =
    248         ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
    249   }
    250 
    251   return HSADebugProps;
    252 }
    253 
    254 void MetadataStreamer::emitVersion() {
    255   auto &Version = HSAMetadata.mVersion;
    256 
    257   Version.push_back(VersionMajor);
    258   Version.push_back(VersionMinor);
    259 }
    260 
    261 void MetadataStreamer::emitPrintf(const Module &Mod) {
    262   auto &Printf = HSAMetadata.mPrintf;
    263 
    264   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
    265   if (!Node)
    266     return;
    267 
    268   for (auto Op : Node->operands())
    269     if (Op->getNumOperands())
    270       Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
    271 }
    272 
    273 void MetadataStreamer::emitKernelLanguage(const Function &Func) {
    274   auto &Kernel = HSAMetadata.mKernels.back();
    275 
    276   // TODO: What about other languages?
    277   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
    278   if (!Node || !Node->getNumOperands())
    279     return;
    280   auto Op0 = Node->getOperand(0);
    281   if (Op0->getNumOperands() <= 1)
    282     return;
    283 
    284   Kernel.mLanguage = "OpenCL C";
    285   Kernel.mLanguageVersion.push_back(
    286       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
    287   Kernel.mLanguageVersion.push_back(
    288       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
    289 }
    290 
    291 void MetadataStreamer::emitKernelAttrs(const Function &Func) {
    292   auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
    293 
    294   if (auto Node = Func.getMetadata("reqd_work_group_size"))
    295     Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
    296   if (auto Node = Func.getMetadata("work_group_size_hint"))
    297     Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
    298   if (auto Node = Func.getMetadata("vec_type_hint")) {
    299     Attrs.mVecTypeHint = getTypeName(
    300         cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
    301         mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
    302   }
    303   if (Func.hasFnAttribute("runtime-handle")) {
    304     Attrs.mRuntimeHandle =
    305         Func.getFnAttribute("runtime-handle").getValueAsString().str();
    306   }
    307 }
    308 
    309 void MetadataStreamer::emitKernelArgs(const Function &Func) {
    310   for (auto &Arg : Func.args())
    311     emitKernelArg(Arg);
    312 
    313   emitHiddenKernelArgs(Func);
    314 }
    315 
    316 void MetadataStreamer::emitKernelArg(const Argument &Arg) {
    317   auto Func = Arg.getParent();
    318   auto ArgNo = Arg.getArgNo();
    319   const MDNode *Node;
    320 
    321   StringRef Name;
    322   Node = Func->getMetadata("kernel_arg_name");
    323   if (Node && ArgNo < Node->getNumOperands())
    324     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
    325   else if (Arg.hasName())
    326     Name = Arg.getName();
    327 
    328   StringRef TypeName;
    329   Node = Func->getMetadata("kernel_arg_type");
    330   if (Node && ArgNo < Node->getNumOperands())
    331     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
    332 
    333   StringRef BaseTypeName;
    334   Node = Func->getMetadata("kernel_arg_base_type");
    335   if (Node && ArgNo < Node->getNumOperands())
    336     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
    337 
    338   StringRef AccQual;
    339   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
    340       Arg.hasNoAliasAttr()) {
    341     AccQual = "read_only";
    342   } else {
    343     Node = Func->getMetadata("kernel_arg_access_qual");
    344     if (Node && ArgNo < Node->getNumOperands())
    345       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
    346   }
    347 
    348   StringRef TypeQual;
    349   Node = Func->getMetadata("kernel_arg_type_qual");
    350   if (Node && ArgNo < Node->getNumOperands())
    351     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
    352 
    353   Type *Ty = Arg.getType();
    354   const DataLayout &DL = Func->getParent()->getDataLayout();
    355 
    356   unsigned PointeeAlign = 0;
    357   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
    358     if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) {
    359       PointeeAlign = Arg.getParamAlignment();
    360       if (PointeeAlign == 0)
    361         PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
    362     }
    363   }
    364 
    365   emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName),
    366                 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
    367 }
    368 
    369 void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
    370                                      ValueKind ValueKind,
    371                                      unsigned PointeeAlign,
    372                                      StringRef Name,
    373                                      StringRef TypeName, StringRef BaseTypeName,
    374                                      StringRef AccQual, StringRef TypeQual) {
    375   HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
    376   auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
    377 
    378   Arg.mName = Name;
    379   Arg.mTypeName = TypeName;
    380   Arg.mSize = DL.getTypeAllocSize(Ty);
    381   Arg.mAlign = DL.getABITypeAlignment(Ty);
    382   Arg.mValueKind = ValueKind;
    383   Arg.mValueType = getValueType(Ty, BaseTypeName);
    384   Arg.mPointeeAlign = PointeeAlign;
    385 
    386   if (auto PtrTy = dyn_cast<PointerType>(Ty))
    387     Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace());
    388 
    389   Arg.mAccQual = getAccessQualifier(AccQual);
    390 
    391   // TODO: Emit Arg.mActualAccQual.
    392 
    393   SmallVector<StringRef, 1> SplitTypeQuals;
    394   TypeQual.split(SplitTypeQuals, " ", -1, false);
    395   for (StringRef Key : SplitTypeQuals) {
    396     auto P = StringSwitch<bool*>(Key)
    397                  .Case("const",    &Arg.mIsConst)
    398                  .Case("restrict", &Arg.mIsRestrict)
    399                  .Case("volatile", &Arg.mIsVolatile)
    400                  .Case("pipe",     &Arg.mIsPipe)
    401                  .Default(nullptr);
    402     if (P)
    403       *P = true;
    404   }
    405 }
    406 
    407 void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) {
    408   int HiddenArgNumBytes =
    409       getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
    410 
    411   if (!HiddenArgNumBytes)
    412     return;
    413 
    414   auto &DL = Func.getParent()->getDataLayout();
    415   auto Int64Ty = Type::getInt64Ty(Func.getContext());
    416 
    417   if (HiddenArgNumBytes >= 8)
    418     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
    419   if (HiddenArgNumBytes >= 16)
    420     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
    421   if (HiddenArgNumBytes >= 24)
    422     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
    423 
    424   auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
    425                                       AMDGPUASI.GLOBAL_ADDRESS);
    426 
    427   // Emit "printf buffer" argument if printf is used, otherwise emit dummy
    428   // "none" argument.
    429   if (HiddenArgNumBytes >= 32) {
    430     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
    431       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
    432     else
    433       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
    434   }
    435 
    436   // Emit "default queue" and "completion action" arguments if enqueue kernel is
    437   // used, otherwise emit dummy "none" arguments.
    438   if (HiddenArgNumBytes >= 48) {
    439     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
    440       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
    441       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
    442     } else {
    443       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
    444       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
    445     }
    446   }
    447 }
    448 
    449 void MetadataStreamer::begin(const Module &Mod) {
    450   AMDGPUASI = getAMDGPUAS(Mod);
    451   emitVersion();
    452   emitPrintf(Mod);
    453 }
    454 
    455 void MetadataStreamer::end() {
    456   std::string HSAMetadataString;
    457   if (toString(HSAMetadata, HSAMetadataString))
    458     return;
    459 
    460   if (DumpHSAMetadata)
    461     dump(HSAMetadataString);
    462   if (VerifyHSAMetadata)
    463     verify(HSAMetadataString);
    464 }
    465 
    466 void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) {
    467   auto &Func = MF.getFunction();
    468   if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
    469     return;
    470 
    471   auto CodeProps = getHSACodeProps(MF, ProgramInfo);
    472   auto DebugProps = getHSADebugProps(MF, ProgramInfo);
    473 
    474   HSAMetadata.mKernels.push_back(Kernel::Metadata());
    475   auto &Kernel = HSAMetadata.mKernels.back();
    476 
    477   Kernel.mName = Func.getName();
    478   Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
    479   emitKernelLanguage(Func);
    480   emitKernelAttrs(Func);
    481   emitKernelArgs(Func);
    482   HSAMetadata.mKernels.back().mCodeProps = CodeProps;
    483   HSAMetadata.mKernels.back().mDebugProps = DebugProps;
    484 }
    485 
    486 } // end namespace HSAMD
    487 } // end namespace AMDGPU
    488 } // end namespace llvm
    489