Home | History | Annotate | Download | only in CodeGen
      1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This provides a class for OpenMP runtime code generation.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "CGOpenMPRuntime.h"
     15 #include "CodeGenFunction.h"
     16 #include "CGCleanup.h"
     17 #include "clang/AST/Decl.h"
     18 #include "clang/AST/StmtOpenMP.h"
     19 #include "llvm/ADT/ArrayRef.h"
     20 #include "llvm/IR/CallSite.h"
     21 #include "llvm/IR/DerivedTypes.h"
     22 #include "llvm/IR/GlobalValue.h"
     23 #include "llvm/IR/Value.h"
     24 #include "llvm/Support/raw_ostream.h"
     25 #include <cassert>
     26 
     27 using namespace clang;
     28 using namespace CodeGen;
     29 
     30 namespace {
     31 /// \brief Base class for handling code generation inside OpenMP regions.
     32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
     33 public:
     34   /// \brief Kinds of OpenMP regions used in codegen.
     35   enum CGOpenMPRegionKind {
     36     /// \brief Region with outlined function for standalone 'parallel'
     37     /// directive.
     38     ParallelOutlinedRegion,
     39     /// \brief Region with outlined function for standalone 'task' directive.
     40     TaskOutlinedRegion,
     41     /// \brief Region for constructs that do not require function outlining,
     42     /// like 'for', 'sections', 'atomic' etc. directives.
     43     InlinedRegion,
     44   };
     45 
     46   CGOpenMPRegionInfo(const CapturedStmt &CS,
     47                      const CGOpenMPRegionKind RegionKind,
     48                      const RegionCodeGenTy &CodeGen)
     49       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
     50         CodeGen(CodeGen) {}
     51 
     52   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
     53                      const RegionCodeGenTy &CodeGen)
     54       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind),
     55         CodeGen(CodeGen) {}
     56 
     57   /// \brief Get a variable or parameter for storing global thread id
     58   /// inside OpenMP construct.
     59   virtual const VarDecl *getThreadIDVariable() const = 0;
     60 
     61   /// \brief Emit the captured statement body.
     62   virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
     63 
     64   /// \brief Get an LValue for the current ThreadID variable.
     65   /// \return LValue for thread id variable. This LValue always has type int32*.
     66   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
     67 
     68   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
     69 
     70   static bool classof(const CGCapturedStmtInfo *Info) {
     71     return Info->getKind() == CR_OpenMP;
     72   }
     73 
     74 protected:
     75   CGOpenMPRegionKind RegionKind;
     76   const RegionCodeGenTy &CodeGen;
     77 };
     78 
     79 /// \brief API for captured statement code generation in OpenMP constructs.
     80 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
     81 public:
     82   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
     83                              const RegionCodeGenTy &CodeGen)
     84       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen),
     85         ThreadIDVar(ThreadIDVar) {
     86     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
     87   }
     88   /// \brief Get a variable or parameter for storing global thread id
     89   /// inside OpenMP construct.
     90   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
     91 
     92   /// \brief Get the name of the capture helper.
     93   StringRef getHelperName() const override { return ".omp_outlined."; }
     94 
     95   static bool classof(const CGCapturedStmtInfo *Info) {
     96     return CGOpenMPRegionInfo::classof(Info) &&
     97            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
     98                ParallelOutlinedRegion;
     99   }
    100 
    101 private:
    102   /// \brief A variable or parameter storing global thread id for OpenMP
    103   /// constructs.
    104   const VarDecl *ThreadIDVar;
    105 };
    106 
    107 /// \brief API for captured statement code generation in OpenMP constructs.
    108 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
    109 public:
    110   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
    111                                  const VarDecl *ThreadIDVar,
    112                                  const RegionCodeGenTy &CodeGen)
    113       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen),
    114         ThreadIDVar(ThreadIDVar) {
    115     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
    116   }
    117   /// \brief Get a variable or parameter for storing global thread id
    118   /// inside OpenMP construct.
    119   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
    120 
    121   /// \brief Get an LValue for the current ThreadID variable.
    122   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
    123 
    124   /// \brief Get the name of the capture helper.
    125   StringRef getHelperName() const override { return ".omp_outlined."; }
    126 
    127   static bool classof(const CGCapturedStmtInfo *Info) {
    128     return CGOpenMPRegionInfo::classof(Info) &&
    129            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
    130                TaskOutlinedRegion;
    131   }
    132 
    133 private:
    134   /// \brief A variable or parameter storing global thread id for OpenMP
    135   /// constructs.
    136   const VarDecl *ThreadIDVar;
    137 };
    138 
    139 /// \brief API for inlined captured statement code generation in OpenMP
    140 /// constructs.
    141 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
    142 public:
    143   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
    144                             const RegionCodeGenTy &CodeGen)
    145       : CGOpenMPRegionInfo(InlinedRegion, CodeGen), OldCSI(OldCSI),
    146         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
    147   // \brief Retrieve the value of the context parameter.
    148   llvm::Value *getContextValue() const override {
    149     if (OuterRegionInfo)
    150       return OuterRegionInfo->getContextValue();
    151     llvm_unreachable("No context value for inlined OpenMP region");
    152   }
    153   virtual void setContextValue(llvm::Value *V) override {
    154     if (OuterRegionInfo) {
    155       OuterRegionInfo->setContextValue(V);
    156       return;
    157     }
    158     llvm_unreachable("No context value for inlined OpenMP region");
    159   }
    160   /// \brief Lookup the captured field decl for a variable.
    161   const FieldDecl *lookup(const VarDecl *VD) const override {
    162     if (OuterRegionInfo)
    163       return OuterRegionInfo->lookup(VD);
    164     // If there is no outer outlined region,no need to lookup in a list of
    165     // captured variables, we can use the original one.
    166     return nullptr;
    167   }
    168   FieldDecl *getThisFieldDecl() const override {
    169     if (OuterRegionInfo)
    170       return OuterRegionInfo->getThisFieldDecl();
    171     return nullptr;
    172   }
    173   /// \brief Get a variable or parameter for storing global thread id
    174   /// inside OpenMP construct.
    175   const VarDecl *getThreadIDVariable() const override {
    176     if (OuterRegionInfo)
    177       return OuterRegionInfo->getThreadIDVariable();
    178     return nullptr;
    179   }
    180 
    181   /// \brief Get the name of the capture helper.
    182   StringRef getHelperName() const override {
    183     if (auto *OuterRegionInfo = getOldCSI())
    184       return OuterRegionInfo->getHelperName();
    185     llvm_unreachable("No helper name for inlined OpenMP construct");
    186   }
    187 
    188   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
    189 
    190   static bool classof(const CGCapturedStmtInfo *Info) {
    191     return CGOpenMPRegionInfo::classof(Info) &&
    192            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
    193   }
    194 
    195 private:
    196   /// \brief CodeGen info about outer OpenMP region.
    197   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
    198   CGOpenMPRegionInfo *OuterRegionInfo;
    199 };
    200 
    201 /// \brief RAII for emitting code of OpenMP constructs.
    202 class InlinedOpenMPRegionRAII {
    203   CodeGenFunction &CGF;
    204 
    205 public:
    206   /// \brief Constructs region for combined constructs.
    207   /// \param CodeGen Code generation sequence for combined directives. Includes
    208   /// a list of functions used for code generation of implicitly inlined
    209   /// regions.
    210   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen)
    211       : CGF(CGF) {
    212     // Start emission for the construct.
    213     CGF.CapturedStmtInfo =
    214         new CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, CodeGen);
    215   }
    216   ~InlinedOpenMPRegionRAII() {
    217     // Restore original CapturedStmtInfo only if we're done with code emission.
    218     auto *OldCSI =
    219         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
    220     delete CGF.CapturedStmtInfo;
    221     CGF.CapturedStmtInfo = OldCSI;
    222   }
    223 };
    224 
    225 } // namespace
    226 
    227 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
    228   return CGF.MakeNaturalAlignAddrLValue(
    229       CGF.Builder.CreateAlignedLoad(
    230           CGF.GetAddrOfLocalVar(getThreadIDVariable()),
    231           CGF.PointerAlignInBytes),
    232       getThreadIDVariable()
    233           ->getType()
    234           ->castAs<PointerType>()
    235           ->getPointeeType());
    236 }
    237 
    238 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
    239   // 1.2.2 OpenMP Language Terminology
    240   // Structured block - An executable statement with a single entry at the
    241   // top and a single exit at the bottom.
    242   // The point of exit cannot be a branch out of the structured block.
    243   // longjmp() and throw() must not violate the entry/exit criteria.
    244   CGF.EHStack.pushTerminate();
    245   {
    246     CodeGenFunction::RunCleanupsScope Scope(CGF);
    247     CodeGen(CGF);
    248   }
    249   CGF.EHStack.popTerminate();
    250 }
    251 
    252 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
    253     CodeGenFunction &CGF) {
    254   return CGF.MakeNaturalAlignAddrLValue(
    255       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
    256       getThreadIDVariable()->getType());
    257 }
    258 
    259 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
    260     : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
    261   IdentTy = llvm::StructType::create(
    262       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
    263       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
    264       CGM.Int8PtrTy /* psource */, nullptr);
    265   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
    266   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
    267                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
    268   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
    269   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
    270 }
    271 
    272 void CGOpenMPRuntime::clear() {
    273   InternalVars.clear();
    274 }
    275 
    276 llvm::Value *
    277 CGOpenMPRuntime::emitParallelOutlinedFunction(const OMPExecutableDirective &D,
    278                                               const VarDecl *ThreadIDVar,
    279                                               const RegionCodeGenTy &CodeGen) {
    280   assert(ThreadIDVar->getType()->isPointerType() &&
    281          "thread id variable must be of type kmp_int32 *");
    282   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
    283   CodeGenFunction CGF(CGM, true);
    284   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen);
    285   CGF.CapturedStmtInfo = &CGInfo;
    286   return CGF.GenerateCapturedStmtFunction(*CS);
    287 }
    288 
    289 llvm::Value *
    290 CGOpenMPRuntime::emitTaskOutlinedFunction(const OMPExecutableDirective &D,
    291                                           const VarDecl *ThreadIDVar,
    292                                           const RegionCodeGenTy &CodeGen) {
    293   assert(!ThreadIDVar->getType()->isPointerType() &&
    294          "thread id variable must be of type kmp_int32 for tasks");
    295   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
    296   CodeGenFunction CGF(CGM, true);
    297   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen);
    298   CGF.CapturedStmtInfo = &CGInfo;
    299   return CGF.GenerateCapturedStmtFunction(*CS);
    300 }
    301 
    302 llvm::Value *
    303 CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
    304   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
    305   if (!Entry) {
    306     if (!DefaultOpenMPPSource) {
    307       // Initialize default location for psource field of ident_t structure of
    308       // all ident_t objects. Format is ";file;function;line;column;;".
    309       // Taken from
    310       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
    311       DefaultOpenMPPSource =
    312           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;");
    313       DefaultOpenMPPSource =
    314           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
    315     }
    316     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
    317         CGM.getModule(), IdentTy, /*isConstant*/ true,
    318         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
    319     DefaultOpenMPLocation->setUnnamedAddr(true);
    320 
    321     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
    322     llvm::Constant *Values[] = {Zero,
    323                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
    324                                 Zero, Zero, DefaultOpenMPPSource};
    325     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
    326     DefaultOpenMPLocation->setInitializer(Init);
    327     OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation;
    328     return DefaultOpenMPLocation;
    329   }
    330   return Entry;
    331 }
    332 
    333 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
    334                                                  SourceLocation Loc,
    335                                                  OpenMPLocationFlags Flags) {
    336   // If no debug info is generated - return global default location.
    337   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
    338       Loc.isInvalid())
    339     return getOrCreateDefaultLocation(Flags);
    340 
    341   assert(CGF.CurFn && "No function in current CodeGenFunction.");
    342 
    343   llvm::Value *LocValue = nullptr;
    344   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
    345   if (I != OpenMPLocThreadIDMap.end())
    346     LocValue = I->second.DebugLoc;
    347   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
    348   // GetOpenMPThreadID was called before this routine.
    349   if (LocValue == nullptr) {
    350     // Generate "ident_t .kmpc_loc.addr;"
    351     llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
    352     AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
    353     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
    354     Elem.second.DebugLoc = AI;
    355     LocValue = AI;
    356 
    357     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
    358     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
    359     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
    360                              llvm::ConstantExpr::getSizeOf(IdentTy),
    361                              CGM.PointerAlignInBytes);
    362   }
    363 
    364   // char **psource = &.kmpc_loc_<flags>.addr.psource;
    365   auto *PSource = CGF.Builder.CreateConstInBoundsGEP2_32(IdentTy, LocValue, 0,
    366                                                          IdentField_PSource);
    367 
    368   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
    369   if (OMPDebugLoc == nullptr) {
    370     SmallString<128> Buffer2;
    371     llvm::raw_svector_ostream OS2(Buffer2);
    372     // Build debug location
    373     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
    374     OS2 << ";" << PLoc.getFilename() << ";";
    375     if (const FunctionDecl *FD =
    376             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
    377       OS2 << FD->getQualifiedNameAsString();
    378     }
    379     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
    380     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
    381     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
    382   }
    383   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
    384   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
    385 
    386   return LocValue;
    387 }
    388 
    389 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
    390                                           SourceLocation Loc) {
    391   assert(CGF.CurFn && "No function in current CodeGenFunction.");
    392 
    393   llvm::Value *ThreadID = nullptr;
    394   // Check whether we've already cached a load of the thread id in this
    395   // function.
    396   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
    397   if (I != OpenMPLocThreadIDMap.end()) {
    398     ThreadID = I->second.ThreadID;
    399     if (ThreadID != nullptr)
    400       return ThreadID;
    401   }
    402   if (auto OMPRegionInfo =
    403           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
    404     if (OMPRegionInfo->getThreadIDVariable()) {
    405       // Check if this an outlined function with thread id passed as argument.
    406       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
    407       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
    408       // If value loaded in entry block, cache it and use it everywhere in
    409       // function.
    410       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
    411         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
    412         Elem.second.ThreadID = ThreadID;
    413       }
    414       return ThreadID;
    415     }
    416   }
    417 
    418   // This is not an outlined function region - need to call __kmpc_int32
    419   // kmpc_global_thread_num(ident_t *loc).
    420   // Generate thread id value and cache this value for use across the
    421   // function.
    422   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
    423   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
    424   ThreadID =
    425       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
    426                           emitUpdateLocation(CGF, Loc));
    427   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
    428   Elem.second.ThreadID = ThreadID;
    429   return ThreadID;
    430 }
    431 
    432 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
    433   assert(CGF.CurFn && "No function in current CodeGenFunction.");
    434   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
    435     OpenMPLocThreadIDMap.erase(CGF.CurFn);
    436 }
    437 
    438 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
    439   return llvm::PointerType::getUnqual(IdentTy);
    440 }
    441 
    442 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
    443   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
    444 }
    445 
    446 llvm::Constant *
    447 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
    448   llvm::Constant *RTLFn = nullptr;
    449   switch (Function) {
    450   case OMPRTL__kmpc_fork_call: {
    451     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
    452     // microtask, ...);
    453     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
    454                                 getKmpc_MicroPointerTy()};
    455     llvm::FunctionType *FnTy =
    456         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
    457     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
    458     break;
    459   }
    460   case OMPRTL__kmpc_global_thread_num: {
    461     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
    462     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
    463     llvm::FunctionType *FnTy =
    464         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
    465     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
    466     break;
    467   }
    468   case OMPRTL__kmpc_threadprivate_cached: {
    469     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
    470     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
    471     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
    472                                 CGM.VoidPtrTy, CGM.SizeTy,
    473                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
    474     llvm::FunctionType *FnTy =
    475         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
    476     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
    477     break;
    478   }
    479   case OMPRTL__kmpc_critical: {
    480     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
    481     // kmp_critical_name *crit);
    482     llvm::Type *TypeParams[] = {
    483         getIdentTyPointerTy(), CGM.Int32Ty,
    484         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
    485     llvm::FunctionType *FnTy =
    486         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
    487     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
    488     break;
    489   }
    490   case OMPRTL__kmpc_threadprivate_register: {
    491     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
    492     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
    493     // typedef void *(*kmpc_ctor)(void *);
    494     auto KmpcCtorTy =
    495         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
    496                                 /*isVarArg*/ false)->getPointerTo();
    497     // typedef void *(*kmpc_cctor)(void *, void *);
    498     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
    499     auto KmpcCopyCtorTy =
    500         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
    501                                 /*isVarArg*/ false)->getPointerTo();
    502     // typedef void (*kmpc_dtor)(void *);
    503     auto KmpcDtorTy =
    504         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
    505             ->getPointerTo();
    506     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
    507                               KmpcCopyCtorTy, KmpcDtorTy};
    508     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
    509                                         /*isVarArg*/ false);
    510     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
    511     break;
    512   }
    513   case OMPRTL__kmpc_end_critical: {
    514     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
    515     // kmp_critical_name *crit);
    516     llvm::Type *TypeParams[] = {
    517         getIdentTyPointerTy(), CGM.Int32Ty,
    518         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
    519     llvm::FunctionType *FnTy =
    520         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
    521     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
    522     break;
    523   }
    524   case OMPRTL__kmpc_cancel_barrier: {
    525     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
    526     // global_tid);
    527     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    528     llvm::FunctionType *FnTy =
    529         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
    530     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
    531     break;
    532   }
    533   case OMPRTL__kmpc_for_static_fini: {
    534     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
    535     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    536     llvm::FunctionType *FnTy =
    537         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
    538     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
    539     break;
    540   }
    541   case OMPRTL__kmpc_push_num_threads: {
    542     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
    543     // kmp_int32 num_threads)
    544     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
    545                                 CGM.Int32Ty};
    546     llvm::FunctionType *FnTy =
    547         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
    548     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
    549     break;
    550   }
    551   case OMPRTL__kmpc_serialized_parallel: {
    552     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
    553     // global_tid);
    554     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    555     llvm::FunctionType *FnTy =
    556         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
    557     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
    558     break;
    559   }
    560   case OMPRTL__kmpc_end_serialized_parallel: {
    561     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
    562     // global_tid);
    563     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    564     llvm::FunctionType *FnTy =
    565         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
    566     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
    567     break;
    568   }
    569   case OMPRTL__kmpc_flush: {
    570     // Build void __kmpc_flush(ident_t *loc);
    571     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
    572     llvm::FunctionType *FnTy =
    573         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
    574     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
    575     break;
    576   }
    577   case OMPRTL__kmpc_master: {
    578     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
    579     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    580     llvm::FunctionType *FnTy =
    581         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
    582     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
    583     break;
    584   }
    585   case OMPRTL__kmpc_end_master: {
    586     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
    587     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    588     llvm::FunctionType *FnTy =
    589         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
    590     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
    591     break;
    592   }
    593   case OMPRTL__kmpc_omp_taskyield: {
    594     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
    595     // int end_part);
    596     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
    597     llvm::FunctionType *FnTy =
    598         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
    599     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
    600     break;
    601   }
    602   case OMPRTL__kmpc_single: {
    603     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
    604     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    605     llvm::FunctionType *FnTy =
    606         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
    607     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
    608     break;
    609   }
    610   case OMPRTL__kmpc_end_single: {
    611     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
    612     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    613     llvm::FunctionType *FnTy =
    614         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
    615     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
    616     break;
    617   }
    618   case OMPRTL__kmpc_omp_task_alloc: {
    619     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
    620     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
    621     // kmp_routine_entry_t *task_entry);
    622     assert(KmpRoutineEntryPtrTy != nullptr &&
    623            "Type kmp_routine_entry_t must be created.");
    624     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
    625                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
    626     // Return void * and then cast to particular kmp_task_t type.
    627     llvm::FunctionType *FnTy =
    628         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
    629     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
    630     break;
    631   }
    632   case OMPRTL__kmpc_omp_task: {
    633     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
    634     // *new_task);
    635     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
    636                                 CGM.VoidPtrTy};
    637     llvm::FunctionType *FnTy =
    638         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
    639     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
    640     break;
    641   }
    642   case OMPRTL__kmpc_copyprivate: {
    643     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
    644     // kmp_int32 cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
    645     // kmp_int32 didit);
    646     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
    647     auto *CpyFnTy =
    648         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
    649     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
    650                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
    651                                 CGM.Int32Ty};
    652     llvm::FunctionType *FnTy =
    653         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
    654     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
    655     break;
    656   }
    657   case OMPRTL__kmpc_reduce: {
    658     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
    659     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
    660     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
    661     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
    662     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
    663                                                /*isVarArg=*/false);
    664     llvm::Type *TypeParams[] = {
    665         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
    666         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
    667         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
    668     llvm::FunctionType *FnTy =
    669         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
    670     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
    671     break;
    672   }
    673   case OMPRTL__kmpc_reduce_nowait: {
    674     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
    675     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
    676     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
    677     // *lck);
    678     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
    679     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
    680                                                /*isVarArg=*/false);
    681     llvm::Type *TypeParams[] = {
    682         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
    683         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
    684         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
    685     llvm::FunctionType *FnTy =
    686         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
    687     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
    688     break;
    689   }
    690   case OMPRTL__kmpc_end_reduce: {
    691     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
    692     // kmp_critical_name *lck);
    693     llvm::Type *TypeParams[] = {
    694         getIdentTyPointerTy(), CGM.Int32Ty,
    695         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
    696     llvm::FunctionType *FnTy =
    697         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
    698     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
    699     break;
    700   }
    701   case OMPRTL__kmpc_end_reduce_nowait: {
    702     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
    703     // kmp_critical_name *lck);
    704     llvm::Type *TypeParams[] = {
    705         getIdentTyPointerTy(), CGM.Int32Ty,
    706         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
    707     llvm::FunctionType *FnTy =
    708         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
    709     RTLFn =
    710         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
    711     break;
    712   }
    713   }
    714   return RTLFn;
    715 }
    716 
    717 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
    718                                                              bool IVSigned) {
    719   assert((IVSize == 32 || IVSize == 64) &&
    720          "IV size is not compatible with the omp runtime");
    721   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
    722                                        : "__kmpc_for_static_init_4u")
    723                            : (IVSigned ? "__kmpc_for_static_init_8"
    724                                        : "__kmpc_for_static_init_8u");
    725   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
    726   auto PtrTy = llvm::PointerType::getUnqual(ITy);
    727   llvm::Type *TypeParams[] = {
    728     getIdentTyPointerTy(),                     // loc
    729     CGM.Int32Ty,                               // tid
    730     CGM.Int32Ty,                               // schedtype
    731     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
    732     PtrTy,                                     // p_lower
    733     PtrTy,                                     // p_upper
    734     PtrTy,                                     // p_stride
    735     ITy,                                       // incr
    736     ITy                                        // chunk
    737   };
    738   llvm::FunctionType *FnTy =
    739       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
    740   return CGM.CreateRuntimeFunction(FnTy, Name);
    741 }
    742 
    743 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
    744                                                             bool IVSigned) {
    745   assert((IVSize == 32 || IVSize == 64) &&
    746          "IV size is not compatible with the omp runtime");
    747   auto Name =
    748       IVSize == 32
    749           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
    750           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
    751   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
    752   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
    753                                CGM.Int32Ty,           // tid
    754                                CGM.Int32Ty,           // schedtype
    755                                ITy,                   // lower
    756                                ITy,                   // upper
    757                                ITy,                   // stride
    758                                ITy                    // chunk
    759   };
    760   llvm::FunctionType *FnTy =
    761       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
    762   return CGM.CreateRuntimeFunction(FnTy, Name);
    763 }
    764 
    765 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
    766                                                             bool IVSigned) {
    767   assert((IVSize == 32 || IVSize == 64) &&
    768          "IV size is not compatible with the omp runtime");
    769   auto Name =
    770       IVSize == 32
    771           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
    772           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
    773   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
    774   auto PtrTy = llvm::PointerType::getUnqual(ITy);
    775   llvm::Type *TypeParams[] = {
    776     getIdentTyPointerTy(),                     // loc
    777     CGM.Int32Ty,                               // tid
    778     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
    779     PtrTy,                                     // p_lower
    780     PtrTy,                                     // p_upper
    781     PtrTy                                      // p_stride
    782   };
    783   llvm::FunctionType *FnTy =
    784       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
    785   return CGM.CreateRuntimeFunction(FnTy, Name);
    786 }
    787 
    788 llvm::Constant *
    789 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
    790   // Lookup the entry, lazily creating it if necessary.
    791   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
    792                                      Twine(CGM.getMangledName(VD)) + ".cache.");
    793 }
    794 
    795 llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
    796                                                      const VarDecl *VD,
    797                                                      llvm::Value *VDAddr,
    798                                                      SourceLocation Loc) {
    799   auto VarTy = VDAddr->getType()->getPointerElementType();
    800   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
    801                          CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy),
    802                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
    803                          getOrCreateThreadPrivateCache(VD)};
    804   return CGF.EmitRuntimeCall(
    805       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args);
    806 }
    807 
    808 void CGOpenMPRuntime::emitThreadPrivateVarInit(
    809     CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor,
    810     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
    811   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
    812   // library.
    813   auto OMPLoc = emitUpdateLocation(CGF, Loc);
    814   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
    815                       OMPLoc);
    816   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
    817   // to register constructor/destructor for variable.
    818   llvm::Value *Args[] = {OMPLoc,
    819                          CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy),
    820                          Ctor, CopyCtor, Dtor};
    821   CGF.EmitRuntimeCall(
    822       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
    823 }
    824 
    825 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
    826     const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc,
    827     bool PerformInit, CodeGenFunction *CGF) {
    828   VD = VD->getDefinition(CGM.getContext());
    829   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
    830     ThreadPrivateWithDefinition.insert(VD);
    831     QualType ASTTy = VD->getType();
    832 
    833     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
    834     auto Init = VD->getAnyInitializer();
    835     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
    836       // Generate function that re-emits the declaration's initializer into the
    837       // threadprivate copy of the variable VD
    838       CodeGenFunction CtorCGF(CGM);
    839       FunctionArgList Args;
    840       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
    841                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
    842       Args.push_back(&Dst);
    843 
    844       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
    845           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
    846           /*isVariadic=*/false);
    847       auto FTy = CGM.getTypes().GetFunctionType(FI);
    848       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
    849           FTy, ".__kmpc_global_ctor_.", Loc);
    850       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
    851                             Args, SourceLocation());
    852       auto ArgVal = CtorCGF.EmitLoadOfScalar(
    853           CtorCGF.GetAddrOfLocalVar(&Dst),
    854           /*Volatile=*/false, CGM.PointerAlignInBytes,
    855           CGM.getContext().VoidPtrTy, Dst.getLocation());
    856       auto Arg = CtorCGF.Builder.CreatePointerCast(
    857           ArgVal,
    858           CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy)));
    859       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
    860                                /*IsInitializer=*/true);
    861       ArgVal = CtorCGF.EmitLoadOfScalar(
    862           CtorCGF.GetAddrOfLocalVar(&Dst),
    863           /*Volatile=*/false, CGM.PointerAlignInBytes,
    864           CGM.getContext().VoidPtrTy, Dst.getLocation());
    865       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
    866       CtorCGF.FinishFunction();
    867       Ctor = Fn;
    868     }
    869     if (VD->getType().isDestructedType() != QualType::DK_none) {
    870       // Generate function that emits destructor call for the threadprivate copy
    871       // of the variable VD
    872       CodeGenFunction DtorCGF(CGM);
    873       FunctionArgList Args;
    874       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
    875                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
    876       Args.push_back(&Dst);
    877 
    878       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
    879           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
    880           /*isVariadic=*/false);
    881       auto FTy = CGM.getTypes().GetFunctionType(FI);
    882       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
    883           FTy, ".__kmpc_global_dtor_.", Loc);
    884       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
    885                             SourceLocation());
    886       auto ArgVal = DtorCGF.EmitLoadOfScalar(
    887           DtorCGF.GetAddrOfLocalVar(&Dst),
    888           /*Volatile=*/false, CGM.PointerAlignInBytes,
    889           CGM.getContext().VoidPtrTy, Dst.getLocation());
    890       DtorCGF.emitDestroy(ArgVal, ASTTy,
    891                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
    892                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
    893       DtorCGF.FinishFunction();
    894       Dtor = Fn;
    895     }
    896     // Do not emit init function if it is not required.
    897     if (!Ctor && !Dtor)
    898       return nullptr;
    899 
    900     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
    901     auto CopyCtorTy =
    902         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
    903                                 /*isVarArg=*/false)->getPointerTo();
    904     // Copying constructor for the threadprivate variable.
    905     // Must be NULL - reserved by runtime, but currently it requires that this
    906     // parameter is always NULL. Otherwise it fires assertion.
    907     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
    908     if (Ctor == nullptr) {
    909       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
    910                                             /*isVarArg=*/false)->getPointerTo();
    911       Ctor = llvm::Constant::getNullValue(CtorTy);
    912     }
    913     if (Dtor == nullptr) {
    914       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
    915                                             /*isVarArg=*/false)->getPointerTo();
    916       Dtor = llvm::Constant::getNullValue(DtorTy);
    917     }
    918     if (!CGF) {
    919       auto InitFunctionTy =
    920           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
    921       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
    922           InitFunctionTy, ".__omp_threadprivate_init_.");
    923       CodeGenFunction InitCGF(CGM);
    924       FunctionArgList ArgList;
    925       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
    926                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
    927                             Loc);
    928       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
    929       InitCGF.FinishFunction();
    930       return InitFunction;
    931     }
    932     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
    933   }
    934   return nullptr;
    935 }
    936 
    937 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
    938                                        llvm::Value *OutlinedFn,
    939                                        llvm::Value *CapturedStruct) {
    940   // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/)
    941   llvm::Value *Args[] = {
    942       emitUpdateLocation(CGF, Loc),
    943       CGF.Builder.getInt32(1), // Number of arguments after 'microtask' argument
    944       // (there is only one additional argument - 'context')
    945       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
    946       CGF.EmitCastToVoidPtr(CapturedStruct)};
    947   auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
    948   CGF.EmitRuntimeCall(RTLFn, Args);
    949 }
    950 
    951 void CGOpenMPRuntime::emitSerialCall(CodeGenFunction &CGF, SourceLocation Loc,
    952                                      llvm::Value *OutlinedFn,
    953                                      llvm::Value *CapturedStruct) {
    954   auto ThreadID = getThreadID(CGF, Loc);
    955   // Build calls:
    956   // __kmpc_serialized_parallel(&Loc, GTid);
    957   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), ThreadID};
    958   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
    959                       Args);
    960 
    961   // OutlinedFn(&GTid, &zero, CapturedStruct);
    962   auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
    963   auto Int32Ty =
    964       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
    965   auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
    966   CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
    967   llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct};
    968   CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
    969 
    970   // __kmpc_end_serialized_parallel(&Loc, GTid);
    971   llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
    972   CGF.EmitRuntimeCall(
    973       createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
    974 }
    975 
    976 // If we're inside an (outlined) parallel region, use the region info's
    977 // thread-ID variable (it is passed in a first argument of the outlined function
    978 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
    979 // regular serial code region, get thread ID by calling kmp_int32
    980 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
    981 // return the address of that temp.
    982 llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
    983                                                   SourceLocation Loc) {
    984   if (auto OMPRegionInfo =
    985           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
    986     if (OMPRegionInfo->getThreadIDVariable())
    987       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
    988 
    989   auto ThreadID = getThreadID(CGF, Loc);
    990   auto Int32Ty =
    991       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
    992   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
    993   CGF.EmitStoreOfScalar(ThreadID,
    994                         CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty));
    995 
    996   return ThreadIDTemp;
    997 }
    998 
    999 llvm::Constant *
   1000 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
   1001                                              const llvm::Twine &Name) {
   1002   SmallString<256> Buffer;
   1003   llvm::raw_svector_ostream Out(Buffer);
   1004   Out << Name;
   1005   auto RuntimeName = Out.str();
   1006   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
   1007   if (Elem.second) {
   1008     assert(Elem.second->getType()->getPointerElementType() == Ty &&
   1009            "OMP internal variable has different type than requested");
   1010     return &*Elem.second;
   1011   }
   1012 
   1013   return Elem.second = new llvm::GlobalVariable(
   1014              CGM.getModule(), Ty, /*IsConstant*/ false,
   1015              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
   1016              Elem.first());
   1017 }
   1018 
   1019 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
   1020   llvm::Twine Name(".gomp_critical_user_", CriticalName);
   1021   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
   1022 }
   1023 
   1024 namespace {
   1025 class CallEndCleanup : public EHScopeStack::Cleanup {
   1026 public:
   1027   typedef ArrayRef<llvm::Value *> CleanupValuesTy;
   1028 private:
   1029   llvm::Value *Callee;
   1030   llvm::SmallVector<llvm::Value *, 8> Args;
   1031 
   1032 public:
   1033   CallEndCleanup(llvm::Value *Callee, CleanupValuesTy Args)
   1034       : Callee(Callee), Args(Args.begin(), Args.end()) {}
   1035   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
   1036     CGF.EmitRuntimeCall(Callee, Args);
   1037   }
   1038 };
   1039 } // namespace
   1040 
   1041 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
   1042                                          StringRef CriticalName,
   1043                                          const RegionCodeGenTy &CriticalOpGen,
   1044                                          SourceLocation Loc) {
   1045   // __kmpc_critical(ident_t *, gtid, Lock);
   1046   // CriticalOpGen();
   1047   // __kmpc_end_critical(ident_t *, gtid, Lock);
   1048   // Prepare arguments and build a call to __kmpc_critical
   1049   {
   1050     CodeGenFunction::RunCleanupsScope Scope(CGF);
   1051     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
   1052                            getCriticalRegionLock(CriticalName)};
   1053     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
   1054     // Build a call to __kmpc_end_critical
   1055     CGF.EHStack.pushCleanup<CallEndCleanup>(
   1056         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
   1057         llvm::makeArrayRef(Args));
   1058     emitInlinedDirective(CGF, CriticalOpGen);
   1059   }
   1060 }
   1061 
   1062 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
   1063                        const RegionCodeGenTy &BodyOpGen) {
   1064   llvm::Value *CallBool = CGF.EmitScalarConversion(
   1065       IfCond,
   1066       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
   1067       CGF.getContext().BoolTy);
   1068 
   1069   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
   1070   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
   1071   // Generate the branch (If-stmt)
   1072   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
   1073   CGF.EmitBlock(ThenBlock);
   1074   CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, BodyOpGen);
   1075   // Emit the rest of bblocks/branches
   1076   CGF.EmitBranch(ContBlock);
   1077   CGF.EmitBlock(ContBlock, true);
   1078 }
   1079 
   1080 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
   1081                                        const RegionCodeGenTy &MasterOpGen,
   1082                                        SourceLocation Loc) {
   1083   // if(__kmpc_master(ident_t *, gtid)) {
   1084   //   MasterOpGen();
   1085   //   __kmpc_end_master(ident_t *, gtid);
   1086   // }
   1087   // Prepare arguments and build a call to __kmpc_master
   1088   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
   1089   auto *IsMaster =
   1090       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
   1091   emitIfStmt(CGF, IsMaster, [&](CodeGenFunction &CGF) -> void {
   1092     CodeGenFunction::RunCleanupsScope Scope(CGF);
   1093     CGF.EHStack.pushCleanup<CallEndCleanup>(
   1094         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
   1095         llvm::makeArrayRef(Args));
   1096     MasterOpGen(CGF);
   1097   });
   1098 }
   1099 
   1100 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
   1101                                         SourceLocation Loc) {
   1102   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
   1103   llvm::Value *Args[] = {
   1104       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
   1105       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
   1106   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
   1107 }
   1108 
   1109 static llvm::Value *emitCopyprivateCopyFunction(
   1110     CodeGenModule &CGM, llvm::Type *ArgsType,
   1111     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
   1112     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
   1113   auto &C = CGM.getContext();
   1114   // void copy_func(void *LHSArg, void *RHSArg);
   1115   FunctionArgList Args;
   1116   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
   1117                            C.VoidPtrTy);
   1118   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
   1119                            C.VoidPtrTy);
   1120   Args.push_back(&LHSArg);
   1121   Args.push_back(&RHSArg);
   1122   FunctionType::ExtInfo EI;
   1123   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
   1124       C.VoidTy, Args, EI, /*isVariadic=*/false);
   1125   auto *Fn = llvm::Function::Create(
   1126       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
   1127       ".omp.copyprivate.copy_func", &CGM.getModule());
   1128   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
   1129   CodeGenFunction CGF(CGM);
   1130   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
   1131   // Dest = (void*[n])(LHSArg);
   1132   // Src = (void*[n])(RHSArg);
   1133   auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   1134       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
   1135                                     CGF.PointerAlignInBytes),
   1136       ArgsType);
   1137   auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   1138       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
   1139                                     CGF.PointerAlignInBytes),
   1140       ArgsType);
   1141   // *(Type0*)Dst[0] = *(Type0*)Src[0];
   1142   // *(Type1*)Dst[1] = *(Type1*)Src[1];
   1143   // ...
   1144   // *(Typen*)Dst[n] = *(Typen*)Src[n];
   1145   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
   1146     auto *DestAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   1147         CGF.Builder.CreateAlignedLoad(
   1148             CGF.Builder.CreateStructGEP(nullptr, LHS, I),
   1149             CGM.PointerAlignInBytes),
   1150         CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
   1151     auto *SrcAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   1152         CGF.Builder.CreateAlignedLoad(
   1153             CGF.Builder.CreateStructGEP(nullptr, RHS, I),
   1154             CGM.PointerAlignInBytes),
   1155         CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
   1156     CGF.EmitOMPCopy(CGF, CopyprivateVars[I]->getType(), DestAddr, SrcAddr,
   1157                     cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()),
   1158                     cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()),
   1159                     AssignmentOps[I]);
   1160   }
   1161   CGF.FinishFunction();
   1162   return Fn;
   1163 }
   1164 
   1165 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
   1166                                        const RegionCodeGenTy &SingleOpGen,
   1167                                        SourceLocation Loc,
   1168                                        ArrayRef<const Expr *> CopyprivateVars,
   1169                                        ArrayRef<const Expr *> SrcExprs,
   1170                                        ArrayRef<const Expr *> DstExprs,
   1171                                        ArrayRef<const Expr *> AssignmentOps) {
   1172   assert(CopyprivateVars.size() == SrcExprs.size() &&
   1173          CopyprivateVars.size() == DstExprs.size() &&
   1174          CopyprivateVars.size() == AssignmentOps.size());
   1175   auto &C = CGM.getContext();
   1176   // int32 did_it = 0;
   1177   // if(__kmpc_single(ident_t *, gtid)) {
   1178   //   SingleOpGen();
   1179   //   __kmpc_end_single(ident_t *, gtid);
   1180   //   did_it = 1;
   1181   // }
   1182   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
   1183   // <copy_func>, did_it);
   1184 
   1185   llvm::AllocaInst *DidIt = nullptr;
   1186   if (!CopyprivateVars.empty()) {
   1187     // int32 did_it = 0;
   1188     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
   1189     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
   1190     CGF.InitTempAlloca(DidIt, CGF.Builder.getInt32(0));
   1191   }
   1192   // Prepare arguments and build a call to __kmpc_single
   1193   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
   1194   auto *IsSingle =
   1195       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
   1196   emitIfStmt(CGF, IsSingle, [&](CodeGenFunction &CGF) -> void {
   1197     CodeGenFunction::RunCleanupsScope Scope(CGF);
   1198     CGF.EHStack.pushCleanup<CallEndCleanup>(
   1199         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
   1200         llvm::makeArrayRef(Args));
   1201     SingleOpGen(CGF);
   1202     if (DidIt) {
   1203       // did_it = 1;
   1204       CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt,
   1205                                      DidIt->getAlignment());
   1206     }
   1207   });
   1208   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
   1209   // <copy_func>, did_it);
   1210   if (DidIt) {
   1211     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
   1212     auto CopyprivateArrayTy =
   1213         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
   1214                                /*IndexTypeQuals=*/0);
   1215     // Create a list of all private variables for copyprivate.
   1216     auto *CopyprivateList =
   1217         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
   1218     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
   1219       auto *Elem = CGF.Builder.CreateStructGEP(
   1220           CopyprivateList->getAllocatedType(), CopyprivateList, I);
   1221       CGF.Builder.CreateAlignedStore(
   1222           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   1223               CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy),
   1224           Elem, CGM.PointerAlignInBytes);
   1225     }
   1226     // Build function that copies private values from single region to all other
   1227     // threads in the corresponding parallel region.
   1228     auto *CpyFn = emitCopyprivateCopyFunction(
   1229         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
   1230         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
   1231     auto *BufSize = CGF.Builder.getInt32(
   1232         C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity());
   1233     auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
   1234                                                                CGF.VoidPtrTy);
   1235     auto *DidItVal =
   1236         CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes);
   1237     llvm::Value *Args[] = {
   1238         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
   1239         getThreadID(CGF, Loc),        // i32 <gtid>
   1240         BufSize,                      // i32 <buf_size>
   1241         CL,                           // void *<copyprivate list>
   1242         CpyFn,                        // void (*) (void *, void *) <copy_func>
   1243         DidItVal                      // i32 did_it
   1244     };
   1245     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
   1246   }
   1247 }
   1248 
   1249 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
   1250                                       OpenMPDirectiveKind Kind) {
   1251   // Build call __kmpc_cancel_barrier(loc, thread_id);
   1252   OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
   1253   if (Kind == OMPD_for) {
   1254     Flags =
   1255         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
   1256   } else if (Kind == OMPD_sections) {
   1257     Flags = static_cast<OpenMPLocationFlags>(Flags |
   1258                                              OMP_IDENT_BARRIER_IMPL_SECTIONS);
   1259   } else if (Kind == OMPD_single) {
   1260     Flags =
   1261         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
   1262   } else if (Kind == OMPD_barrier) {
   1263     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
   1264   } else {
   1265     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
   1266   }
   1267   // Build call __kmpc_cancel_barrier(loc, thread_id);
   1268   // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this
   1269   // one provides the same functionality and adds initial support for
   1270   // cancellation constructs introduced in OpenMP 4.0. __kmpc_cancel_barrier()
   1271   // is provided default by the runtime library so it safe to make such
   1272   // replacement.
   1273   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
   1274                          getThreadID(CGF, Loc)};
   1275   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
   1276 }
   1277 
   1278 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
   1279 /// the enum sched_type in kmp.h).
   1280 enum OpenMPSchedType {
   1281   /// \brief Lower bound for default (unordered) versions.
   1282   OMP_sch_lower = 32,
   1283   OMP_sch_static_chunked = 33,
   1284   OMP_sch_static = 34,
   1285   OMP_sch_dynamic_chunked = 35,
   1286   OMP_sch_guided_chunked = 36,
   1287   OMP_sch_runtime = 37,
   1288   OMP_sch_auto = 38,
   1289   /// \brief Lower bound for 'ordered' versions.
   1290   OMP_ord_lower = 64,
   1291   /// \brief Lower bound for 'nomerge' versions.
   1292   OMP_nm_lower = 160,
   1293 };
   1294 
   1295 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
   1296 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
   1297                                           bool Chunked) {
   1298   switch (ScheduleKind) {
   1299   case OMPC_SCHEDULE_static:
   1300     return Chunked ? OMP_sch_static_chunked : OMP_sch_static;
   1301   case OMPC_SCHEDULE_dynamic:
   1302     return OMP_sch_dynamic_chunked;
   1303   case OMPC_SCHEDULE_guided:
   1304     return OMP_sch_guided_chunked;
   1305   case OMPC_SCHEDULE_auto:
   1306     return OMP_sch_auto;
   1307   case OMPC_SCHEDULE_runtime:
   1308     return OMP_sch_runtime;
   1309   case OMPC_SCHEDULE_unknown:
   1310     assert(!Chunked && "chunk was specified but schedule kind not known");
   1311     return OMP_sch_static;
   1312   }
   1313   llvm_unreachable("Unexpected runtime schedule");
   1314 }
   1315 
   1316 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
   1317                                          bool Chunked) const {
   1318   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
   1319   return Schedule == OMP_sch_static;
   1320 }
   1321 
   1322 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
   1323   auto Schedule = getRuntimeSchedule(ScheduleKind, /* Chunked */ false);
   1324   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
   1325   return Schedule != OMP_sch_static;
   1326 }
   1327 
   1328 void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
   1329                                   OpenMPScheduleClauseKind ScheduleKind,
   1330                                   unsigned IVSize, bool IVSigned,
   1331                                   llvm::Value *IL, llvm::Value *LB,
   1332                                   llvm::Value *UB, llvm::Value *ST,
   1333                                   llvm::Value *Chunk) {
   1334   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr);
   1335   if (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked) {
   1336     // Call __kmpc_dispatch_init(
   1337     //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
   1338     //          kmp_int[32|64] lower, kmp_int[32|64] upper,
   1339     //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
   1340 
   1341     // If the Chunk was not specified in the clause - use default value 1.
   1342     if (Chunk == nullptr)
   1343       Chunk = CGF.Builder.getIntN(IVSize, 1);
   1344     llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
   1345                             getThreadID(CGF, Loc),
   1346                             CGF.Builder.getInt32(Schedule), // Schedule type
   1347                             CGF.Builder.getIntN(IVSize, 0), // Lower
   1348                             UB,                             // Upper
   1349                             CGF.Builder.getIntN(IVSize, 1), // Stride
   1350                             Chunk                           // Chunk
   1351     };
   1352     CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
   1353   } else {
   1354     // Call __kmpc_for_static_init(
   1355     //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
   1356     //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
   1357     //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
   1358     //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
   1359     if (Chunk == nullptr) {
   1360       assert(Schedule == OMP_sch_static &&
   1361              "expected static non-chunked schedule");
   1362       // If the Chunk was not specified in the clause - use default value 1.
   1363       Chunk = CGF.Builder.getIntN(IVSize, 1);
   1364     } else
   1365       assert(Schedule == OMP_sch_static_chunked &&
   1366              "expected static chunked schedule");
   1367     llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
   1368                             getThreadID(CGF, Loc),
   1369                             CGF.Builder.getInt32(Schedule), // Schedule type
   1370                             IL,                             // &isLastIter
   1371                             LB,                             // &LB
   1372                             UB,                             // &UB
   1373                             ST,                             // &Stride
   1374                             CGF.Builder.getIntN(IVSize, 1), // Incr
   1375                             Chunk                           // Chunk
   1376     };
   1377     CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
   1378   }
   1379 }
   1380 
   1381 void CGOpenMPRuntime::emitForFinish(CodeGenFunction &CGF, SourceLocation Loc,
   1382                                     OpenMPScheduleClauseKind ScheduleKind) {
   1383   assert((ScheduleKind == OMPC_SCHEDULE_static ||
   1384           ScheduleKind == OMPC_SCHEDULE_unknown) &&
   1385          "Non-static schedule kinds are not yet implemented");
   1386   (void)ScheduleKind;
   1387   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
   1388   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
   1389                          getThreadID(CGF, Loc)};
   1390   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
   1391                       Args);
   1392 }
   1393 
   1394 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
   1395                                           SourceLocation Loc, unsigned IVSize,
   1396                                           bool IVSigned, llvm::Value *IL,
   1397                                           llvm::Value *LB, llvm::Value *UB,
   1398                                           llvm::Value *ST) {
   1399   // Call __kmpc_dispatch_next(
   1400   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
   1401   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
   1402   //          kmp_int[32|64] *p_stride);
   1403   llvm::Value *Args[] = {
   1404       emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
   1405       IL, // &isLastIter
   1406       LB, // &Lower
   1407       UB, // &Upper
   1408       ST  // &Stride
   1409   };
   1410   llvm::Value *Call =
   1411       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
   1412   return CGF.EmitScalarConversion(
   1413       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
   1414       CGF.getContext().BoolTy);
   1415 }
   1416 
   1417 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
   1418                                            llvm::Value *NumThreads,
   1419                                            SourceLocation Loc) {
   1420   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
   1421   llvm::Value *Args[] = {
   1422       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
   1423       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
   1424   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
   1425                       Args);
   1426 }
   1427 
   1428 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
   1429                                 SourceLocation Loc) {
   1430   // Build call void __kmpc_flush(ident_t *loc)
   1431   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
   1432                       emitUpdateLocation(CGF, Loc));
   1433 }
   1434 
   1435 namespace {
   1436 /// \brief Indexes of fields for type kmp_task_t.
   1437 enum KmpTaskTFields {
   1438   /// \brief List of shared variables.
   1439   KmpTaskTShareds,
   1440   /// \brief Task routine.
   1441   KmpTaskTRoutine,
   1442   /// \brief Partition id for the untied tasks.
   1443   KmpTaskTPartId,
   1444   /// \brief Function with call of destructors for private variables.
   1445   KmpTaskTDestructors,
   1446 };
   1447 } // namespace
   1448 
   1449 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
   1450   if (!KmpRoutineEntryPtrTy) {
   1451     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
   1452     auto &C = CGM.getContext();
   1453     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
   1454     FunctionProtoType::ExtProtoInfo EPI;
   1455     KmpRoutineEntryPtrQTy = C.getPointerType(
   1456         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
   1457     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
   1458   }
   1459 }
   1460 
   1461 static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
   1462                                  QualType FieldTy) {
   1463   auto *Field = FieldDecl::Create(
   1464       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
   1465       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
   1466       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
   1467   Field->setAccess(AS_public);
   1468   DC->addDecl(Field);
   1469 }
   1470 
   1471 static QualType createKmpTaskTRecordDecl(CodeGenModule &CGM,
   1472                                          QualType KmpInt32Ty,
   1473                                          QualType KmpRoutineEntryPointerQTy) {
   1474   auto &C = CGM.getContext();
   1475   // Build struct kmp_task_t {
   1476   //         void *              shareds;
   1477   //         kmp_routine_entry_t routine;
   1478   //         kmp_int32           part_id;
   1479   //         kmp_routine_entry_t destructors;
   1480   //         /*  private vars  */
   1481   //       };
   1482   auto *RD = C.buildImplicitRecord("kmp_task_t");
   1483   RD->startDefinition();
   1484   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
   1485   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
   1486   addFieldToRecordDecl(C, RD, KmpInt32Ty);
   1487   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
   1488   // TODO: add private fields.
   1489   RD->completeDefinition();
   1490   return C.getRecordType(RD);
   1491 }
   1492 
   1493 /// \brief Emit a proxy function which accepts kmp_task_t as the second
   1494 /// argument.
   1495 /// \code
   1496 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
   1497 ///   TaskFunction(gtid, tt->part_id, tt->shareds);
   1498 ///   return 0;
   1499 /// }
   1500 /// \endcode
   1501 static llvm::Value *
   1502 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
   1503                       QualType KmpInt32Ty, QualType KmpTaskTPtrQTy,
   1504                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
   1505                       llvm::Type *KmpTaskTTy) {
   1506   auto &C = CGM.getContext();
   1507   FunctionArgList Args;
   1508   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
   1509   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
   1510                                 /*Id=*/nullptr, KmpTaskTPtrQTy);
   1511   Args.push_back(&GtidArg);
   1512   Args.push_back(&TaskTypeArg);
   1513   FunctionType::ExtInfo Info;
   1514   auto &TaskEntryFnInfo =
   1515       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
   1516                                                     /*isVariadic=*/false);
   1517   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
   1518   auto *TaskEntry =
   1519       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
   1520                              ".omp_task_entry.", &CGM.getModule());
   1521   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry);
   1522   CodeGenFunction CGF(CGM);
   1523   CGF.disableDebugInfo();
   1524   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
   1525 
   1526   // TaskFunction(gtid, tt->part_id, tt->shareds);
   1527   auto *GtidParam = CGF.EmitLoadOfScalar(
   1528       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false,
   1529       C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
   1530   auto TaskTypeArgAddr = CGF.EmitLoadOfScalar(
   1531       CGF.GetAddrOfLocalVar(&TaskTypeArg), /*Volatile=*/false,
   1532       CGM.PointerAlignInBytes, KmpTaskTPtrQTy, Loc);
   1533   auto *PartidPtr = CGF.Builder.CreateStructGEP(KmpTaskTTy, TaskTypeArgAddr,
   1534                                                 /*Idx=*/KmpTaskTPartId);
   1535   auto *PartidParam = CGF.EmitLoadOfScalar(
   1536       PartidPtr, /*Volatile=*/false,
   1537       C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
   1538   auto *SharedsPtr = CGF.Builder.CreateStructGEP(KmpTaskTTy, TaskTypeArgAddr,
   1539                                                  /*Idx=*/KmpTaskTShareds);
   1540   auto *SharedsParam =
   1541       CGF.EmitLoadOfScalar(SharedsPtr, /*Volatile=*/false,
   1542                            CGM.PointerAlignInBytes, C.VoidPtrTy, Loc);
   1543   llvm::Value *CallArgs[] = {
   1544       GtidParam, PartidParam,
   1545       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   1546           SharedsParam, CGF.ConvertTypeForMem(SharedsPtrTy))};
   1547   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
   1548   CGF.EmitStoreThroughLValue(
   1549       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
   1550       CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty));
   1551   CGF.FinishFunction();
   1552   return TaskEntry;
   1553 }
   1554 
   1555 void CGOpenMPRuntime::emitTaskCall(
   1556     CodeGenFunction &CGF, SourceLocation Loc, bool Tied,
   1557     llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
   1558     llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds) {
   1559   auto &C = CGM.getContext();
   1560   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
   1561   // Build type kmp_routine_entry_t (if not built yet).
   1562   emitKmpRoutineEntryT(KmpInt32Ty);
   1563   // Build particular struct kmp_task_t for the given task.
   1564   auto KmpTaskQTy =
   1565       createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy);
   1566   QualType KmpTaskTPtrQTy = C.getPointerType(KmpTaskQTy);
   1567   auto *KmpTaskTTy = CGF.ConvertType(KmpTaskQTy);
   1568   auto *KmpTaskTPtrTy = KmpTaskTTy->getPointerTo();
   1569   auto KmpTaskTySize = CGM.getSize(C.getTypeSizeInChars(KmpTaskQTy));
   1570   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
   1571 
   1572   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
   1573   // kmp_task_t *tt);
   1574   auto *TaskEntry =
   1575       emitProxyTaskFunction(CGM, Loc, KmpInt32Ty, KmpTaskTPtrQTy, SharedsPtrTy,
   1576                             TaskFunction, KmpTaskTTy);
   1577 
   1578   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
   1579   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
   1580   // kmp_routine_entry_t *task_entry);
   1581   // Task flags. Format is taken from
   1582   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
   1583   // description of kmp_tasking_flags struct.
   1584   const unsigned TiedFlag = 0x1;
   1585   const unsigned FinalFlag = 0x2;
   1586   unsigned Flags = Tied ? TiedFlag : 0;
   1587   auto *TaskFlags =
   1588       Final.getPointer()
   1589           ? CGF.Builder.CreateSelect(Final.getPointer(),
   1590                                      CGF.Builder.getInt32(FinalFlag),
   1591                                      CGF.Builder.getInt32(/*C=*/0))
   1592           : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
   1593   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
   1594   auto SharedsSize = C.getTypeSizeInChars(SharedsTy);
   1595   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
   1596                               getThreadID(CGF, Loc), TaskFlags, KmpTaskTySize,
   1597                               CGM.getSize(SharedsSize),
   1598                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   1599                                   TaskEntry, KmpRoutineEntryPtrTy)};
   1600   auto *NewTask = CGF.EmitRuntimeCall(
   1601       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
   1602   auto *NewTaskNewTaskTTy =
   1603       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(NewTask, KmpTaskTPtrTy);
   1604   // Fill the data in the resulting kmp_task_t record.
   1605   // Copy shareds if there are any.
   1606   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty())
   1607     CGF.EmitAggregateCopy(
   1608         CGF.EmitLoadOfScalar(
   1609             CGF.Builder.CreateStructGEP(KmpTaskTTy, NewTaskNewTaskTTy,
   1610                                         /*Idx=*/KmpTaskTShareds),
   1611             /*Volatile=*/false, CGM.PointerAlignInBytes, SharedsPtrTy, Loc),
   1612         Shareds, SharedsTy);
   1613   // TODO: generate function with destructors for privates.
   1614   // Provide pointer to function with destructors for privates.
   1615   CGF.Builder.CreateAlignedStore(
   1616       llvm::ConstantPointerNull::get(
   1617           cast<llvm::PointerType>(KmpRoutineEntryPtrTy)),
   1618       CGF.Builder.CreateStructGEP(KmpTaskTTy, NewTaskNewTaskTTy,
   1619                                   /*Idx=*/KmpTaskTDestructors),
   1620       CGM.PointerAlignInBytes);
   1621 
   1622   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
   1623   // libcall.
   1624   // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
   1625   // *new_task);
   1626   llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc),
   1627                              getThreadID(CGF, Loc), NewTask};
   1628   // TODO: add check for untied tasks.
   1629   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
   1630 }
   1631 
   1632 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
   1633                                           llvm::Type *ArgsType,
   1634                                           ArrayRef<const Expr *> LHSExprs,
   1635                                           ArrayRef<const Expr *> RHSExprs,
   1636                                           ArrayRef<const Expr *> ReductionOps) {
   1637   auto &C = CGM.getContext();
   1638 
   1639   // void reduction_func(void *LHSArg, void *RHSArg);
   1640   FunctionArgList Args;
   1641   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
   1642                            C.VoidPtrTy);
   1643   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
   1644                            C.VoidPtrTy);
   1645   Args.push_back(&LHSArg);
   1646   Args.push_back(&RHSArg);
   1647   FunctionType::ExtInfo EI;
   1648   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
   1649       C.VoidTy, Args, EI, /*isVariadic=*/false);
   1650   auto *Fn = llvm::Function::Create(
   1651       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
   1652       ".omp.reduction.reduction_func", &CGM.getModule());
   1653   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
   1654   CodeGenFunction CGF(CGM);
   1655   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
   1656 
   1657   // Dst = (void*[n])(LHSArg);
   1658   // Src = (void*[n])(RHSArg);
   1659   auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   1660       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
   1661                                     CGF.PointerAlignInBytes),
   1662       ArgsType);
   1663   auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   1664       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
   1665                                     CGF.PointerAlignInBytes),
   1666       ArgsType);
   1667 
   1668   //  ...
   1669   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
   1670   //  ...
   1671   CodeGenFunction::OMPPrivateScope Scope(CGF);
   1672   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) {
   1673     Scope.addPrivate(
   1674         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()),
   1675         [&]() -> llvm::Value *{
   1676           return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   1677               CGF.Builder.CreateAlignedLoad(
   1678                   CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, RHS, I),
   1679                   CGM.PointerAlignInBytes),
   1680               CGF.ConvertTypeForMem(C.getPointerType(RHSExprs[I]->getType())));
   1681         });
   1682     Scope.addPrivate(
   1683         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()),
   1684         [&]() -> llvm::Value *{
   1685           return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   1686               CGF.Builder.CreateAlignedLoad(
   1687                   CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, LHS, I),
   1688                   CGM.PointerAlignInBytes),
   1689               CGF.ConvertTypeForMem(C.getPointerType(LHSExprs[I]->getType())));
   1690         });
   1691   }
   1692   Scope.Privatize();
   1693   for (auto *E : ReductionOps) {
   1694     CGF.EmitIgnoredExpr(E);
   1695   }
   1696   Scope.ForceCleanup();
   1697   CGF.FinishFunction();
   1698   return Fn;
   1699 }
   1700 
   1701 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
   1702                                     ArrayRef<const Expr *> LHSExprs,
   1703                                     ArrayRef<const Expr *> RHSExprs,
   1704                                     ArrayRef<const Expr *> ReductionOps,
   1705                                     bool WithNowait) {
   1706   // Next code should be emitted for reduction:
   1707   //
   1708   // static kmp_critical_name lock = { 0 };
   1709   //
   1710   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
   1711   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
   1712   //  ...
   1713   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
   1714   //  *(Type<n>-1*)rhs[<n>-1]);
   1715   // }
   1716   //
   1717   // ...
   1718   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
   1719   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
   1720   // RedList, reduce_func, &<lock>)) {
   1721   // case 1:
   1722   //  ...
   1723   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
   1724   //  ...
   1725   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
   1726   // break;
   1727   // case 2:
   1728   //  ...
   1729   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
   1730   //  ...
   1731   // break;
   1732   // default:;
   1733   // }
   1734 
   1735   auto &C = CGM.getContext();
   1736 
   1737   // 1. Build a list of reduction variables.
   1738   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
   1739   llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size());
   1740   QualType ReductionArrayTy =
   1741       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
   1742                              /*IndexTypeQuals=*/0);
   1743   auto *ReductionList =
   1744       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
   1745   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) {
   1746     auto *Elem = CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, ReductionList, I);
   1747     CGF.Builder.CreateAlignedStore(
   1748         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   1749             CGF.EmitLValue(RHSExprs[I]).getAddress(), CGF.VoidPtrTy),
   1750         Elem, CGM.PointerAlignInBytes);
   1751   }
   1752 
   1753   // 2. Emit reduce_func().
   1754   auto *ReductionFn = emitReductionFunction(
   1755       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), LHSExprs,
   1756       RHSExprs, ReductionOps);
   1757 
   1758   // 3. Create static kmp_critical_name lock = { 0 };
   1759   auto *Lock = getCriticalRegionLock(".reduction");
   1760 
   1761   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
   1762   // RedList, reduce_func, &<lock>);
   1763   auto *IdentTLoc = emitUpdateLocation(
   1764       CGF, Loc,
   1765       static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
   1766   auto *ThreadId = getThreadID(CGF, Loc);
   1767   auto *ReductionArrayTySize = llvm::ConstantInt::get(
   1768       CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity());
   1769   auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList,
   1770                                                              CGF.VoidPtrTy);
   1771   llvm::Value *Args[] = {
   1772       IdentTLoc,                             // ident_t *<loc>
   1773       ThreadId,                              // i32 <gtid>
   1774       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
   1775       ReductionArrayTySize,                  // size_type sizeof(RedList)
   1776       RL,                                    // void *RedList
   1777       ReductionFn, // void (*) (void *, void *) <reduce_func>
   1778       Lock         // kmp_critical_name *&<lock>
   1779   };
   1780   auto Res = CGF.EmitRuntimeCall(
   1781       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
   1782                                        : OMPRTL__kmpc_reduce),
   1783       Args);
   1784 
   1785   // 5. Build switch(res)
   1786   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
   1787   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
   1788 
   1789   // 6. Build case 1:
   1790   //  ...
   1791   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
   1792   //  ...
   1793   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
   1794   // break;
   1795   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
   1796   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
   1797   CGF.EmitBlock(Case1BB);
   1798 
   1799   {
   1800     CodeGenFunction::RunCleanupsScope Scope(CGF);
   1801     // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
   1802     llvm::Value *EndArgs[] = {
   1803         IdentTLoc, // ident_t *<loc>
   1804         ThreadId,  // i32 <gtid>
   1805         Lock       // kmp_critical_name *&<lock>
   1806     };
   1807     CGF.EHStack.pushCleanup<CallEndCleanup>(
   1808         NormalAndEHCleanup,
   1809         createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
   1810                                          : OMPRTL__kmpc_end_reduce),
   1811         llvm::makeArrayRef(EndArgs));
   1812     for (auto *E : ReductionOps) {
   1813       CGF.EmitIgnoredExpr(E);
   1814     }
   1815   }
   1816 
   1817   CGF.EmitBranch(DefaultBB);
   1818 
   1819   // 7. Build case 2:
   1820   //  ...
   1821   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
   1822   //  ...
   1823   // break;
   1824   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
   1825   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
   1826   CGF.EmitBlock(Case2BB);
   1827 
   1828   {
   1829     CodeGenFunction::RunCleanupsScope Scope(CGF);
   1830     auto I = LHSExprs.begin();
   1831     for (auto *E : ReductionOps) {
   1832       const Expr *XExpr = nullptr;
   1833       const Expr *EExpr = nullptr;
   1834       const Expr *UpExpr = nullptr;
   1835       BinaryOperatorKind BO = BO_Comma;
   1836       // Try to emit update expression as a simple atomic.
   1837       if (auto *ACO = dyn_cast<AbstractConditionalOperator>(E)) {
   1838         // If this is a conditional operator, analyze it's condition for
   1839         // min/max reduction operator.
   1840         E = ACO->getCond();
   1841       }
   1842       if (auto *BO = dyn_cast<BinaryOperator>(E)) {
   1843         if (BO->getOpcode() == BO_Assign) {
   1844           XExpr = BO->getLHS();
   1845           UpExpr = BO->getRHS();
   1846         }
   1847       }
   1848       // Analyze RHS part of the whole expression.
   1849       if (UpExpr) {
   1850         if (auto *BORHS =
   1851                 dyn_cast<BinaryOperator>(UpExpr->IgnoreParenImpCasts())) {
   1852           EExpr = BORHS->getRHS();
   1853           BO = BORHS->getOpcode();
   1854         }
   1855       }
   1856       if (XExpr) {
   1857         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
   1858         LValue X = CGF.EmitLValue(XExpr);
   1859         RValue E;
   1860         if (EExpr)
   1861           E = CGF.EmitAnyExpr(EExpr);
   1862         CGF.EmitOMPAtomicSimpleUpdateExpr(
   1863             X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
   1864             [&CGF, UpExpr, VD](RValue XRValue) {
   1865               CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
   1866               PrivateScope.addPrivate(
   1867                   VD, [&CGF, VD, XRValue]() -> llvm::Value *{
   1868                     auto *LHSTemp = CGF.CreateMemTemp(VD->getType());
   1869                     CGF.EmitStoreThroughLValue(
   1870                         XRValue,
   1871                         CGF.MakeNaturalAlignAddrLValue(LHSTemp, VD->getType()));
   1872                     return LHSTemp;
   1873                   });
   1874               (void)PrivateScope.Privatize();
   1875               return CGF.EmitAnyExpr(UpExpr);
   1876             });
   1877       } else {
   1878         // Emit as a critical region.
   1879         emitCriticalRegion(CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) {
   1880           CGF.EmitIgnoredExpr(E);
   1881         }, Loc);
   1882       }
   1883       ++I;
   1884     }
   1885   }
   1886 
   1887   CGF.EmitBranch(DefaultBB);
   1888   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
   1889 }
   1890 
   1891 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
   1892                                            const RegionCodeGenTy &CodeGen) {
   1893   InlinedOpenMPRegionRAII Region(CGF, CodeGen);
   1894   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
   1895 }
   1896 
   1897