Home | History | Annotate | Download | only in CodeGen
      1 //===----- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ----===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This provides a class for OpenMP runtime code generation specialized to NVPTX
     11 // targets.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
     16 #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
     17 
     18 #include "CGOpenMPRuntime.h"
     19 #include "CodeGenFunction.h"
     20 #include "clang/AST/StmtOpenMP.h"
     21 #include "llvm/IR/CallSite.h"
     22 
     23 namespace clang {
     24 namespace CodeGen {
     25 
     26 class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
     27 public:
     28   class EntryFunctionState {
     29   public:
     30     llvm::BasicBlock *ExitBB;
     31 
     32     EntryFunctionState() : ExitBB(nullptr){};
     33   };
     34 
     35   class WorkerFunctionState {
     36   public:
     37     llvm::Function *WorkerFn;
     38     const CGFunctionInfo *CGFI;
     39 
     40     WorkerFunctionState(CodeGenModule &CGM);
     41 
     42   private:
     43     void createWorkerFunction(CodeGenModule &CGM);
     44   };
     45 
     46   /// \brief Helper for target entry function. Guide the master and worker
     47   /// threads to their respective locations.
     48   void emitEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
     49                        WorkerFunctionState &WST);
     50 
     51   /// \brief Signal termination of OMP execution.
     52   void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
     53 
     54 private:
     55   //
     56   // NVPTX calls.
     57   //
     58 
     59   /// \brief Get the GPU warp size.
     60   llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF);
     61 
     62   /// \brief Get the id of the current thread on the GPU.
     63   llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF);
     64 
     65   // \brief Get the maximum number of threads in a block of the GPU.
     66   llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF);
     67 
     68   /// \brief Get barrier to synchronize all threads in a block.
     69   void getNVPTXCTABarrier(CodeGenFunction &CGF);
     70 
     71   // \brief Synchronize all GPU threads in a block.
     72   void syncCTAThreads(CodeGenFunction &CGF);
     73 
     74   //
     75   // OMP calls.
     76   //
     77 
     78   /// \brief Get the thread id of the OMP master thread.
     79   /// The master thread id is the first thread (lane) of the last warp in the
     80   /// GPU block.  Warp size is assumed to be some power of 2.
     81   /// Thread id is 0 indexed.
     82   /// E.g: If NumThreads is 33, master id is 32.
     83   ///      If NumThreads is 64, master id is 32.
     84   ///      If NumThreads is 1024, master id is 992.
     85   llvm::Value *getMasterThreadID(CodeGenFunction &CGF);
     86 
     87   //
     88   // Private state and methods.
     89   //
     90 
     91   // Master-worker control state.
     92   // Number of requested OMP threads in parallel region.
     93   llvm::GlobalVariable *ActiveWorkers;
     94   // Outlined function for the workers to execute.
     95   llvm::GlobalVariable *WorkID;
     96 
     97   /// \brief Initialize master-worker control state.
     98   void initializeEnvironment();
     99 
    100   /// \brief Emit the worker function for the current target region.
    101   void emitWorkerFunction(WorkerFunctionState &WST);
    102 
    103   /// \brief Helper for worker function. Emit body of worker loop.
    104   void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST);
    105 
    106   /// \brief Returns specified OpenMP runtime function for the current OpenMP
    107   /// implementation.  Specialized for the NVPTX device.
    108   /// \param Function OpenMP runtime function.
    109   /// \return Specified function.
    110   llvm::Constant *createNVPTXRuntimeFunction(unsigned Function);
    111 
    112   //
    113   // Base class overrides.
    114   //
    115 
    116   /// \brief Creates offloading entry for the provided entry ID \a ID,
    117   /// address \a Addr and size \a Size.
    118   void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
    119                           uint64_t Size) override;
    120 
    121   /// \brief Emit outlined function for 'target' directive on the NVPTX
    122   /// device.
    123   /// \param D Directive to emit.
    124   /// \param ParentName Name of the function that encloses the target region.
    125   /// \param OutlinedFn Outlined function value to be defined by this call.
    126   /// \param OutlinedFnID Outlined function ID value to be defined by this call.
    127   /// \param IsOffloadEntry True if the outlined function is an offload entry.
    128   /// An outlined function may not be an entry if, e.g. the if clause always
    129   /// evaluates to false.
    130   void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
    131                                   StringRef ParentName,
    132                                   llvm::Function *&OutlinedFn,
    133                                   llvm::Constant *&OutlinedFnID,
    134                                   bool IsOffloadEntry,
    135                                   const RegionCodeGenTy &CodeGen) override;
    136 
    137 public:
    138   explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
    139 
    140   /// \brief This function ought to emit, in the general case, a call to
    141   // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
    142   // as these numbers are obtained through the PTX grid and block configuration.
    143   /// \param NumTeams An integer expression of teams.
    144   /// \param ThreadLimit An integer expression of threads.
    145   void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
    146                           const Expr *ThreadLimit, SourceLocation Loc) override;
    147 
    148   /// \brief Emits inlined function for the specified OpenMP parallel
    149   //  directive but an inlined function for teams.
    150   /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
    151   /// kmp_int32 BoundID, struct context_vars*).
    152   /// \param D OpenMP directive.
    153   /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
    154   /// \param InnermostKind Kind of innermost directive (for simple directives it
    155   /// is a directive itself, for combined - its innermost directive).
    156   /// \param CodeGen Code generation sequence for the \a D directive.
    157   llvm::Value *
    158   emitParallelOrTeamsOutlinedFunction(const OMPExecutableDirective &D,
    159                                       const VarDecl *ThreadIDVar,
    160                                       OpenMPDirectiveKind InnermostKind,
    161                                       const RegionCodeGenTy &CodeGen) override;
    162 
    163   /// \brief Emits code for teams call of the \a OutlinedFn with
    164   /// variables captured in a record which address is stored in \a
    165   /// CapturedStruct.
    166   /// \param OutlinedFn Outlined function to be run by team masters. Type of
    167   /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
    168   /// \param CapturedVars A pointer to the record with the references to
    169   /// variables used in \a OutlinedFn function.
    170   ///
    171   void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
    172                      SourceLocation Loc, llvm::Value *OutlinedFn,
    173                      ArrayRef<llvm::Value *> CapturedVars) override;
    174 };
    175 
    176 } // CodeGen namespace.
    177 } // clang namespace.
    178 
    179 #endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
    180