1 //===----- CGCUDANV.cpp - Interface to NVIDIA CUDA Runtime ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for CUDA code generation targeting the NVIDIA CUDA 11 // runtime library. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "CGCUDARuntime.h" 16 #include "CodeGenFunction.h" 17 #include "CodeGenModule.h" 18 #include "clang/AST/Decl.h" 19 #include "llvm/BasicBlock.h" 20 #include "llvm/Constants.h" 21 #include "llvm/DerivedTypes.h" 22 #include "llvm/Support/CallSite.h" 23 24 #include <vector> 25 26 using namespace clang; 27 using namespace CodeGen; 28 29 namespace { 30 31 class CGNVCUDARuntime : public CGCUDARuntime { 32 33 private: 34 llvm::Type *IntTy, *SizeTy; 35 llvm::PointerType *CharPtrTy, *VoidPtrTy; 36 37 llvm::Constant *getSetupArgumentFn() const; 38 llvm::Constant *getLaunchFn() const; 39 40 public: 41 CGNVCUDARuntime(CodeGenModule &CGM); 42 43 void EmitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args); 44 }; 45 46 } 47 48 CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : CGCUDARuntime(CGM) { 49 CodeGen::CodeGenTypes &Types = CGM.getTypes(); 50 ASTContext &Ctx = CGM.getContext(); 51 52 IntTy = Types.ConvertType(Ctx.IntTy); 53 SizeTy = Types.ConvertType(Ctx.getSizeType()); 54 55 CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy)); 56 VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy)); 57 } 58 59 llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const { 60 // cudaError_t cudaSetupArgument(void *, size_t, size_t) 61 std::vector<llvm::Type*> Params; 62 Params.push_back(VoidPtrTy); 63 Params.push_back(SizeTy); 64 Params.push_back(SizeTy); 65 return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy, 66 Params, false), 67 "cudaSetupArgument"); 68 } 69 70 llvm::Constant *CGNVCUDARuntime::getLaunchFn() const { 71 // cudaError_t cudaLaunch(char *) 72 std::vector<llvm::Type*> Params; 73 Params.push_back(CharPtrTy); 74 return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy, 75 Params, false), 76 "cudaLaunch"); 77 } 78 79 void CGNVCUDARuntime::EmitDeviceStubBody(CodeGenFunction &CGF, 80 FunctionArgList &Args) { 81 // Build the argument value list and the argument stack struct type. 82 llvm::SmallVector<llvm::Value *, 16> ArgValues; 83 std::vector<llvm::Type *> ArgTypes; 84 for (FunctionArgList::const_iterator I = Args.begin(), E = Args.end(); 85 I != E; ++I) { 86 llvm::Value *V = CGF.GetAddrOfLocalVar(*I); 87 ArgValues.push_back(V); 88 assert(isa<llvm::PointerType>(V->getType()) && "Arg type not PointerType"); 89 ArgTypes.push_back(cast<llvm::PointerType>(V->getType())->getElementType()); 90 } 91 llvm::StructType *ArgStackTy = llvm::StructType::get( 92 CGF.getLLVMContext(), ArgTypes); 93 94 llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); 95 96 // Emit the calls to cudaSetupArgument 97 llvm::Constant *cudaSetupArgFn = getSetupArgumentFn(); 98 for (unsigned I = 0, E = Args.size(); I != E; ++I) { 99 llvm::Value *Args[3]; 100 llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next"); 101 Args[0] = CGF.Builder.CreatePointerCast(ArgValues[I], VoidPtrTy); 102 Args[1] = CGF.Builder.CreateIntCast( 103 llvm::ConstantExpr::getSizeOf(ArgTypes[I]), 104 SizeTy, false); 105 Args[2] = CGF.Builder.CreateIntCast( 106 llvm::ConstantExpr::getOffsetOf(ArgStackTy, I), 107 SizeTy, false); 108 llvm::CallSite CS = CGF.EmitCallOrInvoke(cudaSetupArgFn, Args); 109 llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0); 110 llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero); 111 CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock); 112 CGF.EmitBlock(NextBlock); 113 } 114 115 // Emit the call to cudaLaunch 116 llvm::Constant *cudaLaunchFn = getLaunchFn(); 117 llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy); 118 CGF.EmitCallOrInvoke(cudaLaunchFn, Arg); 119 CGF.EmitBranch(EndBlock); 120 121 CGF.EmitBlock(EndBlock); 122 } 123 124 CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) { 125 return new CGNVCUDARuntime(CGM); 126 } 127