Home | History | Annotate | Download | only in CodeGen
      1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This provides a class for OpenMP runtime code generation.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "CGOpenMPRuntime.h"
     15 #include "CodeGenFunction.h"
     16 #include "CGCleanup.h"
     17 #include "clang/AST/Decl.h"
     18 #include "clang/AST/StmtOpenMP.h"
     19 #include "llvm/ADT/ArrayRef.h"
     20 #include "llvm/IR/CallSite.h"
     21 #include "llvm/IR/DerivedTypes.h"
     22 #include "llvm/IR/GlobalValue.h"
     23 #include "llvm/IR/Value.h"
     24 #include "llvm/Support/raw_ostream.h"
     25 #include <cassert>
     26 
     27 using namespace clang;
     28 using namespace CodeGen;
     29 
     30 namespace {
     31 /// \brief Base class for handling code generation inside OpenMP regions.
     32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
     33 public:
     34   /// \brief Kinds of OpenMP regions used in codegen.
     35   enum CGOpenMPRegionKind {
     36     /// \brief Region with outlined function for standalone 'parallel'
     37     /// directive.
     38     ParallelOutlinedRegion,
     39     /// \brief Region with outlined function for standalone 'task' directive.
     40     TaskOutlinedRegion,
     41     /// \brief Region for constructs that do not require function outlining,
     42     /// like 'for', 'sections', 'atomic' etc. directives.
     43     InlinedRegion,
     44     /// \brief Region with outlined function for standalone 'target' directive.
     45     TargetRegion,
     46   };
     47 
     48   CGOpenMPRegionInfo(const CapturedStmt &CS,
     49                      const CGOpenMPRegionKind RegionKind,
     50                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
     51                      bool HasCancel)
     52       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
     53         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
     54 
     55   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
     56                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
     57                      bool HasCancel)
     58       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
     59         Kind(Kind), HasCancel(HasCancel) {}
     60 
     61   /// \brief Get a variable or parameter for storing global thread id
     62   /// inside OpenMP construct.
     63   virtual const VarDecl *getThreadIDVariable() const = 0;
     64 
     65   /// \brief Emit the captured statement body.
     66   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
     67 
     68   /// \brief Get an LValue for the current ThreadID variable.
     69   /// \return LValue for thread id variable. This LValue always has type int32*.
     70   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
     71 
     72   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
     73 
     74   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
     75 
     76   bool hasCancel() const { return HasCancel; }
     77 
     78   static bool classof(const CGCapturedStmtInfo *Info) {
     79     return Info->getKind() == CR_OpenMP;
     80   }
     81 
     82 protected:
     83   CGOpenMPRegionKind RegionKind;
     84   const RegionCodeGenTy &CodeGen;
     85   OpenMPDirectiveKind Kind;
     86   bool HasCancel;
     87 };
     88 
     89 /// \brief API for captured statement code generation in OpenMP constructs.
     90 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
     91 public:
     92   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
     93                              const RegionCodeGenTy &CodeGen,
     94                              OpenMPDirectiveKind Kind, bool HasCancel)
     95       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
     96                            HasCancel),
     97         ThreadIDVar(ThreadIDVar) {
     98     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
     99   }
    100   /// \brief Get a variable or parameter for storing global thread id
    101   /// inside OpenMP construct.
    102   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
    103 
    104   /// \brief Get the name of the capture helper.
    105   StringRef getHelperName() const override { return ".omp_outlined."; }
    106 
    107   static bool classof(const CGCapturedStmtInfo *Info) {
    108     return CGOpenMPRegionInfo::classof(Info) &&
    109            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
    110                ParallelOutlinedRegion;
    111   }
    112 
    113 private:
    114   /// \brief A variable or parameter storing global thread id for OpenMP
    115   /// constructs.
    116   const VarDecl *ThreadIDVar;
    117 };
    118 
    119 /// \brief API for captured statement code generation in OpenMP constructs.
    120 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
    121 public:
    122   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
    123                                  const VarDecl *ThreadIDVar,
    124                                  const RegionCodeGenTy &CodeGen,
    125                                  OpenMPDirectiveKind Kind, bool HasCancel)
    126       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
    127         ThreadIDVar(ThreadIDVar) {
    128     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
    129   }
    130   /// \brief Get a variable or parameter for storing global thread id
    131   /// inside OpenMP construct.
    132   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
    133 
    134   /// \brief Get an LValue for the current ThreadID variable.
    135   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
    136 
    137   /// \brief Get the name of the capture helper.
    138   StringRef getHelperName() const override { return ".omp_outlined."; }
    139 
    140   static bool classof(const CGCapturedStmtInfo *Info) {
    141     return CGOpenMPRegionInfo::classof(Info) &&
    142            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
    143                TaskOutlinedRegion;
    144   }
    145 
    146 private:
    147   /// \brief A variable or parameter storing global thread id for OpenMP
    148   /// constructs.
    149   const VarDecl *ThreadIDVar;
    150 };
    151 
    152 /// \brief API for inlined captured statement code generation in OpenMP
    153 /// constructs.
    154 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
    155 public:
    156   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
    157                             const RegionCodeGenTy &CodeGen,
    158                             OpenMPDirectiveKind Kind, bool HasCancel)
    159       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
    160         OldCSI(OldCSI),
    161         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
    162   // \brief Retrieve the value of the context parameter.
    163   llvm::Value *getContextValue() const override {
    164     if (OuterRegionInfo)
    165       return OuterRegionInfo->getContextValue();
    166     llvm_unreachable("No context value for inlined OpenMP region");
    167   }
    168   void setContextValue(llvm::Value *V) override {
    169     if (OuterRegionInfo) {
    170       OuterRegionInfo->setContextValue(V);
    171       return;
    172     }
    173     llvm_unreachable("No context value for inlined OpenMP region");
    174   }
    175   /// \brief Lookup the captured field decl for a variable.
    176   const FieldDecl *lookup(const VarDecl *VD) const override {
    177     if (OuterRegionInfo)
    178       return OuterRegionInfo->lookup(VD);
    179     // If there is no outer outlined region,no need to lookup in a list of
    180     // captured variables, we can use the original one.
    181     return nullptr;
    182   }
    183   FieldDecl *getThisFieldDecl() const override {
    184     if (OuterRegionInfo)
    185       return OuterRegionInfo->getThisFieldDecl();
    186     return nullptr;
    187   }
    188   /// \brief Get a variable or parameter for storing global thread id
    189   /// inside OpenMP construct.
    190   const VarDecl *getThreadIDVariable() const override {
    191     if (OuterRegionInfo)
    192       return OuterRegionInfo->getThreadIDVariable();
    193     return nullptr;
    194   }
    195 
    196   /// \brief Get the name of the capture helper.
    197   StringRef getHelperName() const override {
    198     if (auto *OuterRegionInfo = getOldCSI())
    199       return OuterRegionInfo->getHelperName();
    200     llvm_unreachable("No helper name for inlined OpenMP construct");
    201   }
    202 
    203   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
    204 
    205   static bool classof(const CGCapturedStmtInfo *Info) {
    206     return CGOpenMPRegionInfo::classof(Info) &&
    207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
    208   }
    209 
    210 private:
    211   /// \brief CodeGen info about outer OpenMP region.
    212   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
    213   CGOpenMPRegionInfo *OuterRegionInfo;
    214 };
    215 
    216 /// \brief API for captured statement code generation in OpenMP target
    217 /// constructs. For this captures, implicit parameters are used instead of the
    218 /// captured fields.
    219 class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo {
    220 public:
    221   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
    222                            const RegionCodeGenTy &CodeGen)
    223       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
    224                            /*HasCancel = */ false) {}
    225 
    226   /// \brief This is unused for target regions because each starts executing
    227   /// with a single thread.
    228   const VarDecl *getThreadIDVariable() const override { return nullptr; }
    229 
    230   /// \brief Get the name of the capture helper.
    231   StringRef getHelperName() const override { return ".omp_offloading."; }
    232 
    233   static bool classof(const CGCapturedStmtInfo *Info) {
    234     return CGOpenMPRegionInfo::classof(Info) &&
    235            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
    236   }
    237 };
    238 
    239 /// \brief RAII for emitting code of OpenMP constructs.
    240 class InlinedOpenMPRegionRAII {
    241   CodeGenFunction &CGF;
    242 
    243 public:
    244   /// \brief Constructs region for combined constructs.
    245   /// \param CodeGen Code generation sequence for combined directives. Includes
    246   /// a list of functions used for code generation of implicitly inlined
    247   /// regions.
    248   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
    249                           OpenMPDirectiveKind Kind, bool HasCancel)
    250       : CGF(CGF) {
    251     // Start emission for the construct.
    252     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
    253         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
    254   }
    255   ~InlinedOpenMPRegionRAII() {
    256     // Restore original CapturedStmtInfo only if we're done with code emission.
    257     auto *OldCSI =
    258         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
    259     delete CGF.CapturedStmtInfo;
    260     CGF.CapturedStmtInfo = OldCSI;
    261   }
    262 };
    263 
    264 } // anonymous namespace
    265 
    266 static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr,
    267                                       QualType Ty) {
    268   AlignmentSource Source;
    269   CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source);
    270   return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align),
    271                             Ty->getPointeeType(), Source);
    272 }
    273 
    274 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
    275   return emitLoadOfPointerLValue(CGF,
    276                                  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
    277                                  getThreadIDVariable()->getType());
    278 }
    279 
    280 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
    281   if (!CGF.HaveInsertPoint())
    282     return;
    283   // 1.2.2 OpenMP Language Terminology
    284   // Structured block - An executable statement with a single entry at the
    285   // top and a single exit at the bottom.
    286   // The point of exit cannot be a branch out of the structured block.
    287   // longjmp() and throw() must not violate the entry/exit criteria.
    288   CGF.EHStack.pushTerminate();
    289   {
    290     CodeGenFunction::RunCleanupsScope Scope(CGF);
    291     CodeGen(CGF);
    292   }
    293   CGF.EHStack.popTerminate();
    294 }
    295 
    296 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
    297     CodeGenFunction &CGF) {
    298   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
    299                             getThreadIDVariable()->getType(),
    300                             AlignmentSource::Decl);
    301 }
    302 
    303 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
    304     : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
    305   IdentTy = llvm::StructType::create(
    306       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
    307       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
    308       CGM.Int8PtrTy /* psource */, nullptr);
    309   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
    310   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
    311                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
    312   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
    313   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
    314 }
    315 
    316 void CGOpenMPRuntime::clear() {
    317   InternalVars.clear();
    318 }
    319 
    320 // Layout information for ident_t.
    321 static CharUnits getIdentAlign(CodeGenModule &CGM) {
    322   return CGM.getPointerAlign();
    323 }
    324 static CharUnits getIdentSize(CodeGenModule &CGM) {
    325   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
    326   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
    327 }
    328 static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) {
    329   // All the fields except the last are i32, so this works beautifully.
    330   return unsigned(Field) * CharUnits::fromQuantity(4);
    331 }
    332 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
    333                                    CGOpenMPRuntime::IdentFieldIndex Field,
    334                                    const llvm::Twine &Name = "") {
    335   auto Offset = getOffsetOfIdentField(Field);
    336   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
    337 }
    338 
    339 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
    340     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
    341     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
    342   assert(ThreadIDVar->getType()->isPointerType() &&
    343          "thread id variable must be of type kmp_int32 *");
    344   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
    345   CodeGenFunction CGF(CGM, true);
    346   bool HasCancel = false;
    347   if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
    348     HasCancel = OPD->hasCancel();
    349   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
    350     HasCancel = OPSD->hasCancel();
    351   else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
    352     HasCancel = OPFD->hasCancel();
    353   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
    354                                     HasCancel);
    355   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
    356   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
    357 }
    358 
    359 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
    360     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
    361     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
    362   assert(!ThreadIDVar->getType()->isPointerType() &&
    363          "thread id variable must be of type kmp_int32 for tasks");
    364   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
    365   CodeGenFunction CGF(CGM, true);
    366   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
    367                                         InnermostKind,
    368                                         cast<OMPTaskDirective>(D).hasCancel());
    369   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
    370   return CGF.GenerateCapturedStmtFunction(*CS);
    371 }
    372 
    373 Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
    374   CharUnits Align = getIdentAlign(CGM);
    375   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
    376   if (!Entry) {
    377     if (!DefaultOpenMPPSource) {
    378       // Initialize default location for psource field of ident_t structure of
    379       // all ident_t objects. Format is ";file;function;line;column;;".
    380       // Taken from
    381       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
    382       DefaultOpenMPPSource =
    383           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
    384       DefaultOpenMPPSource =
    385           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
    386     }
    387     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
    388         CGM.getModule(), IdentTy, /*isConstant*/ true,
    389         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
    390     DefaultOpenMPLocation->setUnnamedAddr(true);
    391     DefaultOpenMPLocation->setAlignment(Align.getQuantity());
    392 
    393     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
    394     llvm::Constant *Values[] = {Zero,
    395                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
    396                                 Zero, Zero, DefaultOpenMPPSource};
    397     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
    398     DefaultOpenMPLocation->setInitializer(Init);
    399     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
    400   }
    401   return Address(Entry, Align);
    402 }
    403 
    404 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
    405                                                  SourceLocation Loc,
    406                                                  OpenMPLocationFlags Flags) {
    407   // If no debug info is generated - return global default location.
    408   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
    409       Loc.isInvalid())
    410     return getOrCreateDefaultLocation(Flags).getPointer();
    411 
    412   assert(CGF.CurFn && "No function in current CodeGenFunction.");
    413 
    414   Address LocValue = Address::invalid();
    415   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
    416   if (I != OpenMPLocThreadIDMap.end())
    417     LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
    418 
    419   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
    420   // GetOpenMPThreadID was called before this routine.
    421   if (!LocValue.isValid()) {
    422     // Generate "ident_t .kmpc_loc.addr;"
    423     Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
    424                                       ".kmpc_loc.addr");
    425     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
    426     Elem.second.DebugLoc = AI.getPointer();
    427     LocValue = AI;
    428 
    429     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
    430     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
    431     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
    432                              CGM.getSize(getIdentSize(CGF.CGM)));
    433   }
    434 
    435   // char **psource = &.kmpc_loc_<flags>.addr.psource;
    436   Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
    437 
    438   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
    439   if (OMPDebugLoc == nullptr) {
    440     SmallString<128> Buffer2;
    441     llvm::raw_svector_ostream OS2(Buffer2);
    442     // Build debug location
    443     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
    444     OS2 << ";" << PLoc.getFilename() << ";";
    445     if (const FunctionDecl *FD =
    446             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
    447       OS2 << FD->getQualifiedNameAsString();
    448     }
    449     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
    450     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
    451     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
    452   }
    453   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
    454   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
    455 
    456   // Our callers always pass this to a runtime function, so for
    457   // convenience, go ahead and return a naked pointer.
    458   return LocValue.getPointer();
    459 }
    460 
    461 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
    462                                           SourceLocation Loc) {
    463   assert(CGF.CurFn && "No function in current CodeGenFunction.");
    464 
    465   llvm::Value *ThreadID = nullptr;
    466   // Check whether we've already cached a load of the thread id in this
    467   // function.
    468   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
    469   if (I != OpenMPLocThreadIDMap.end()) {
    470     ThreadID = I->second.ThreadID;
    471     if (ThreadID != nullptr)
    472       return ThreadID;
    473   }
    474   if (auto OMPRegionInfo =
    475           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
    476     if (OMPRegionInfo->getThreadIDVariable()) {
    477       // Check if this an outlined function with thread id passed as argument.
    478       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
    479       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
    480       // If value loaded in entry block, cache it and use it everywhere in
    481       // function.
    482       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
    483         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
    484         Elem.second.ThreadID = ThreadID;
    485       }
    486       return ThreadID;
    487     }
    488   }
    489 
    490   // This is not an outlined function region - need to call __kmpc_int32
    491   // kmpc_global_thread_num(ident_t *loc).
    492   // Generate thread id value and cache this value for use across the
    493   // function.
    494   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
    495   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
    496   ThreadID =
    497       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
    498                           emitUpdateLocation(CGF, Loc));
    499   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
    500   Elem.second.ThreadID = ThreadID;
    501   return ThreadID;
    502 }
    503 
    504 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
    505   assert(CGF.CurFn && "No function in current CodeGenFunction.");
    506   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
    507     OpenMPLocThreadIDMap.erase(CGF.CurFn);
    508 }
    509 
    510 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
    511   return llvm::PointerType::getUnqual(IdentTy);
    512 }
    513 
    514 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
    515   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
    516 }
    517 
    518 llvm::Constant *
    519 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
    520   llvm::Constant *RTLFn = nullptr;
    521   switch (Function) {
    522   case OMPRTL__kmpc_fork_call: {
    523     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
    524     // microtask, ...);
    525     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
    526                                 getKmpc_MicroPointerTy()};
    527     llvm::FunctionType *FnTy =
    528         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
    529     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
    530     break;
    531   }
    532   case OMPRTL__kmpc_global_thread_num: {
    533     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
    534     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
    535     llvm::FunctionType *FnTy =
    536         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
    537     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
    538     break;
    539   }
    540   case OMPRTL__kmpc_threadprivate_cached: {
    541     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
    542     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
    543     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
    544                                 CGM.VoidPtrTy, CGM.SizeTy,
    545                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
    546     llvm::FunctionType *FnTy =
    547         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
    548     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
    549     break;
    550   }
    551   case OMPRTL__kmpc_critical: {
    552     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
    553     // kmp_critical_name *crit);
    554     llvm::Type *TypeParams[] = {
    555         getIdentTyPointerTy(), CGM.Int32Ty,
    556         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
    557     llvm::FunctionType *FnTy =
    558         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
    559     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
    560     break;
    561   }
    562   case OMPRTL__kmpc_critical_with_hint: {
    563     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
    564     // kmp_critical_name *crit, uintptr_t hint);
    565     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
    566                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
    567                                 CGM.IntPtrTy};
    568     llvm::FunctionType *FnTy =
    569         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
    570     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
    571     break;
    572   }
    573   case OMPRTL__kmpc_threadprivate_register: {
    574     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
    575     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
    576     // typedef void *(*kmpc_ctor)(void *);
    577     auto KmpcCtorTy =
    578         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
    579                                 /*isVarArg*/ false)->getPointerTo();
    580     // typedef void *(*kmpc_cctor)(void *, void *);
    581     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
    582     auto KmpcCopyCtorTy =
    583         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
    584                                 /*isVarArg*/ false)->getPointerTo();
    585     // typedef void (*kmpc_dtor)(void *);
    586     auto KmpcDtorTy =
    587         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
    588             ->getPointerTo();
    589     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
    590                               KmpcCopyCtorTy, KmpcDtorTy};
    591     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
    592                                         /*isVarArg*/ false);
    593     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
    594     break;
    595   }
    596   case OMPRTL__kmpc_end_critical: {
    597     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
    598     // kmp_critical_name *crit);
    599     llvm::Type *TypeParams[] = {
    600         getIdentTyPointerTy(), CGM.Int32Ty,
    601         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
    602     llvm::FunctionType *FnTy =
    603         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
    604     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
    605     break;
    606   }
    607   case OMPRTL__kmpc_cancel_barrier: {
    608     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
    609     // global_tid);
    610     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    611     llvm::FunctionType *FnTy =
    612         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
    613     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
    614     break;
    615   }
    616   case OMPRTL__kmpc_barrier: {
    617     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
    618     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    619     llvm::FunctionType *FnTy =
    620         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
    621     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
    622     break;
    623   }
    624   case OMPRTL__kmpc_for_static_fini: {
    625     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
    626     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    627     llvm::FunctionType *FnTy =
    628         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
    629     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
    630     break;
    631   }
    632   case OMPRTL__kmpc_push_num_threads: {
    633     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
    634     // kmp_int32 num_threads)
    635     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
    636                                 CGM.Int32Ty};
    637     llvm::FunctionType *FnTy =
    638         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
    639     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
    640     break;
    641   }
    642   case OMPRTL__kmpc_serialized_parallel: {
    643     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
    644     // global_tid);
    645     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    646     llvm::FunctionType *FnTy =
    647         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
    648     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
    649     break;
    650   }
    651   case OMPRTL__kmpc_end_serialized_parallel: {
    652     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
    653     // global_tid);
    654     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    655     llvm::FunctionType *FnTy =
    656         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
    657     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
    658     break;
    659   }
    660   case OMPRTL__kmpc_flush: {
    661     // Build void __kmpc_flush(ident_t *loc);
    662     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
    663     llvm::FunctionType *FnTy =
    664         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
    665     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
    666     break;
    667   }
    668   case OMPRTL__kmpc_master: {
    669     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
    670     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    671     llvm::FunctionType *FnTy =
    672         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
    673     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
    674     break;
    675   }
    676   case OMPRTL__kmpc_end_master: {
    677     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
    678     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    679     llvm::FunctionType *FnTy =
    680         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
    681     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
    682     break;
    683   }
    684   case OMPRTL__kmpc_omp_taskyield: {
    685     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
    686     // int end_part);
    687     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
    688     llvm::FunctionType *FnTy =
    689         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
    690     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
    691     break;
    692   }
    693   case OMPRTL__kmpc_single: {
    694     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
    695     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    696     llvm::FunctionType *FnTy =
    697         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
    698     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
    699     break;
    700   }
    701   case OMPRTL__kmpc_end_single: {
    702     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
    703     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    704     llvm::FunctionType *FnTy =
    705         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
    706     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
    707     break;
    708   }
    709   case OMPRTL__kmpc_omp_task_alloc: {
    710     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
    711     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
    712     // kmp_routine_entry_t *task_entry);
    713     assert(KmpRoutineEntryPtrTy != nullptr &&
    714            "Type kmp_routine_entry_t must be created.");
    715     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
    716                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
    717     // Return void * and then cast to particular kmp_task_t type.
    718     llvm::FunctionType *FnTy =
    719         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
    720     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
    721     break;
    722   }
    723   case OMPRTL__kmpc_omp_task: {
    724     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
    725     // *new_task);
    726     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
    727                                 CGM.VoidPtrTy};
    728     llvm::FunctionType *FnTy =
    729         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
    730     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
    731     break;
    732   }
    733   case OMPRTL__kmpc_copyprivate: {
    734     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
    735     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
    736     // kmp_int32 didit);
    737     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
    738     auto *CpyFnTy =
    739         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
    740     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
    741                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
    742                                 CGM.Int32Ty};
    743     llvm::FunctionType *FnTy =
    744         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
    745     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
    746     break;
    747   }
    748   case OMPRTL__kmpc_reduce: {
    749     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
    750     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
    751     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
    752     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
    753     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
    754                                                /*isVarArg=*/false);
    755     llvm::Type *TypeParams[] = {
    756         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
    757         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
    758         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
    759     llvm::FunctionType *FnTy =
    760         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
    761     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
    762     break;
    763   }
    764   case OMPRTL__kmpc_reduce_nowait: {
    765     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
    766     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
    767     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
    768     // *lck);
    769     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
    770     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
    771                                                /*isVarArg=*/false);
    772     llvm::Type *TypeParams[] = {
    773         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
    774         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
    775         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
    776     llvm::FunctionType *FnTy =
    777         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
    778     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
    779     break;
    780   }
    781   case OMPRTL__kmpc_end_reduce: {
    782     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
    783     // kmp_critical_name *lck);
    784     llvm::Type *TypeParams[] = {
    785         getIdentTyPointerTy(), CGM.Int32Ty,
    786         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
    787     llvm::FunctionType *FnTy =
    788         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
    789     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
    790     break;
    791   }
    792   case OMPRTL__kmpc_end_reduce_nowait: {
    793     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
    794     // kmp_critical_name *lck);
    795     llvm::Type *TypeParams[] = {
    796         getIdentTyPointerTy(), CGM.Int32Ty,
    797         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
    798     llvm::FunctionType *FnTy =
    799         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
    800     RTLFn =
    801         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
    802     break;
    803   }
    804   case OMPRTL__kmpc_omp_task_begin_if0: {
    805     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
    806     // *new_task);
    807     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
    808                                 CGM.VoidPtrTy};
    809     llvm::FunctionType *FnTy =
    810         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
    811     RTLFn =
    812         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
    813     break;
    814   }
    815   case OMPRTL__kmpc_omp_task_complete_if0: {
    816     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
    817     // *new_task);
    818     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
    819                                 CGM.VoidPtrTy};
    820     llvm::FunctionType *FnTy =
    821         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
    822     RTLFn = CGM.CreateRuntimeFunction(FnTy,
    823                                       /*Name=*/"__kmpc_omp_task_complete_if0");
    824     break;
    825   }
    826   case OMPRTL__kmpc_ordered: {
    827     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
    828     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    829     llvm::FunctionType *FnTy =
    830         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
    831     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
    832     break;
    833   }
    834   case OMPRTL__kmpc_end_ordered: {
    835     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
    836     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    837     llvm::FunctionType *FnTy =
    838         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
    839     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
    840     break;
    841   }
    842   case OMPRTL__kmpc_omp_taskwait: {
    843     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
    844     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    845     llvm::FunctionType *FnTy =
    846         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
    847     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
    848     break;
    849   }
    850   case OMPRTL__kmpc_taskgroup: {
    851     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
    852     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    853     llvm::FunctionType *FnTy =
    854         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
    855     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
    856     break;
    857   }
    858   case OMPRTL__kmpc_end_taskgroup: {
    859     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
    860     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
    861     llvm::FunctionType *FnTy =
    862         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
    863     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
    864     break;
    865   }
    866   case OMPRTL__kmpc_push_proc_bind: {
    867     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
    868     // int proc_bind)
    869     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
    870     llvm::FunctionType *FnTy =
    871         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
    872     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
    873     break;
    874   }
    875   case OMPRTL__kmpc_omp_task_with_deps: {
    876     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
    877     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
    878     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
    879     llvm::Type *TypeParams[] = {
    880         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
    881         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
    882     llvm::FunctionType *FnTy =
    883         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
    884     RTLFn =
    885         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
    886     break;
    887   }
    888   case OMPRTL__kmpc_omp_wait_deps: {
    889     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
    890     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
    891     // kmp_depend_info_t *noalias_dep_list);
    892     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
    893                                 CGM.Int32Ty,           CGM.VoidPtrTy,
    894                                 CGM.Int32Ty,           CGM.VoidPtrTy};
    895     llvm::FunctionType *FnTy =
    896         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
    897     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
    898     break;
    899   }
    900   case OMPRTL__kmpc_cancellationpoint: {
    901     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
    902     // global_tid, kmp_int32 cncl_kind)
    903     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
    904     llvm::FunctionType *FnTy =
    905         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
    906     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
    907     break;
    908   }
    909   case OMPRTL__kmpc_cancel: {
    910     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
    911     // kmp_int32 cncl_kind)
    912     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
    913     llvm::FunctionType *FnTy =
    914         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
    915     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
    916     break;
    917   }
    918   case OMPRTL__tgt_target: {
    919     // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
    920     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
    921     // *arg_types);
    922     llvm::Type *TypeParams[] = {CGM.Int32Ty,
    923                                 CGM.VoidPtrTy,
    924                                 CGM.Int32Ty,
    925                                 CGM.VoidPtrPtrTy,
    926                                 CGM.VoidPtrPtrTy,
    927                                 CGM.SizeTy->getPointerTo(),
    928                                 CGM.Int32Ty->getPointerTo()};
    929     llvm::FunctionType *FnTy =
    930         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
    931     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
    932     break;
    933   }
    934   }
    935   return RTLFn;
    936 }
    937 
    938 static llvm::Value *getTypeSize(CodeGenFunction &CGF, QualType Ty) {
    939   auto &C = CGF.getContext();
    940   llvm::Value *Size = nullptr;
    941   auto SizeInChars = C.getTypeSizeInChars(Ty);
    942   if (SizeInChars.isZero()) {
    943     // getTypeSizeInChars() returns 0 for a VLA.
    944     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
    945       llvm::Value *ArraySize;
    946       std::tie(ArraySize, Ty) = CGF.getVLASize(VAT);
    947       Size = Size ? CGF.Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
    948     }
    949     SizeInChars = C.getTypeSizeInChars(Ty);
    950     assert(!SizeInChars.isZero());
    951     Size = CGF.Builder.CreateNUWMul(
    952         Size, llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity()));
    953   } else
    954     Size = llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity());
    955   return Size;
    956 }
    957 
    958 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
    959                                                              bool IVSigned) {
    960   assert((IVSize == 32 || IVSize == 64) &&
    961          "IV size is not compatible with the omp runtime");
    962   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
    963                                        : "__kmpc_for_static_init_4u")
    964                            : (IVSigned ? "__kmpc_for_static_init_8"
    965                                        : "__kmpc_for_static_init_8u");
    966   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
    967   auto PtrTy = llvm::PointerType::getUnqual(ITy);
    968   llvm::Type *TypeParams[] = {
    969     getIdentTyPointerTy(),                     // loc
    970     CGM.Int32Ty,                               // tid
    971     CGM.Int32Ty,                               // schedtype
    972     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
    973     PtrTy,                                     // p_lower
    974     PtrTy,                                     // p_upper
    975     PtrTy,                                     // p_stride
    976     ITy,                                       // incr
    977     ITy                                        // chunk
    978   };
    979   llvm::FunctionType *FnTy =
    980       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
    981   return CGM.CreateRuntimeFunction(FnTy, Name);
    982 }
    983 
    984 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
    985                                                             bool IVSigned) {
    986   assert((IVSize == 32 || IVSize == 64) &&
    987          "IV size is not compatible with the omp runtime");
    988   auto Name =
    989       IVSize == 32
    990           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
    991           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
    992   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
    993   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
    994                                CGM.Int32Ty,           // tid
    995                                CGM.Int32Ty,           // schedtype
    996                                ITy,                   // lower
    997                                ITy,                   // upper
    998                                ITy,                   // stride
    999                                ITy                    // chunk
   1000   };
   1001   llvm::FunctionType *FnTy =
   1002       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
   1003   return CGM.CreateRuntimeFunction(FnTy, Name);
   1004 }
   1005 
   1006 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
   1007                                                             bool IVSigned) {
   1008   assert((IVSize == 32 || IVSize == 64) &&
   1009          "IV size is not compatible with the omp runtime");
   1010   auto Name =
   1011       IVSize == 32
   1012           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
   1013           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
   1014   llvm::Type *TypeParams[] = {
   1015       getIdentTyPointerTy(), // loc
   1016       CGM.Int32Ty,           // tid
   1017   };
   1018   llvm::FunctionType *FnTy =
   1019       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
   1020   return CGM.CreateRuntimeFunction(FnTy, Name);
   1021 }
   1022 
   1023 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
   1024                                                             bool IVSigned) {
   1025   assert((IVSize == 32 || IVSize == 64) &&
   1026          "IV size is not compatible with the omp runtime");
   1027   auto Name =
   1028       IVSize == 32
   1029           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
   1030           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
   1031   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
   1032   auto PtrTy = llvm::PointerType::getUnqual(ITy);
   1033   llvm::Type *TypeParams[] = {
   1034     getIdentTyPointerTy(),                     // loc
   1035     CGM.Int32Ty,                               // tid
   1036     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
   1037     PtrTy,                                     // p_lower
   1038     PtrTy,                                     // p_upper
   1039     PtrTy                                      // p_stride
   1040   };
   1041   llvm::FunctionType *FnTy =
   1042       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
   1043   return CGM.CreateRuntimeFunction(FnTy, Name);
   1044 }
   1045 
   1046 llvm::Constant *
   1047 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
   1048   assert(!CGM.getLangOpts().OpenMPUseTLS ||
   1049          !CGM.getContext().getTargetInfo().isTLSSupported());
   1050   // Lookup the entry, lazily creating it if necessary.
   1051   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
   1052                                      Twine(CGM.getMangledName(VD)) + ".cache.");
   1053 }
   1054 
   1055 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
   1056                                                 const VarDecl *VD,
   1057                                                 Address VDAddr,
   1058                                                 SourceLocation Loc) {
   1059   if (CGM.getLangOpts().OpenMPUseTLS &&
   1060       CGM.getContext().getTargetInfo().isTLSSupported())
   1061     return VDAddr;
   1062 
   1063   auto VarTy = VDAddr.getElementType();
   1064   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
   1065                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
   1066                                                        CGM.Int8PtrTy),
   1067                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
   1068                          getOrCreateThreadPrivateCache(VD)};
   1069   return Address(CGF.EmitRuntimeCall(
   1070       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
   1071                  VDAddr.getAlignment());
   1072 }
   1073 
   1074 void CGOpenMPRuntime::emitThreadPrivateVarInit(
   1075     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
   1076     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
   1077   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
   1078   // library.
   1079   auto OMPLoc = emitUpdateLocation(CGF, Loc);
   1080   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
   1081                       OMPLoc);
   1082   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
   1083   // to register constructor/destructor for variable.
   1084   llvm::Value *Args[] = {OMPLoc,
   1085                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
   1086                                                        CGM.VoidPtrTy),
   1087                          Ctor, CopyCtor, Dtor};
   1088   CGF.EmitRuntimeCall(
   1089       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
   1090 }
   1091 
   1092 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
   1093     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
   1094     bool PerformInit, CodeGenFunction *CGF) {
   1095   if (CGM.getLangOpts().OpenMPUseTLS &&
   1096       CGM.getContext().getTargetInfo().isTLSSupported())
   1097     return nullptr;
   1098 
   1099   VD = VD->getDefinition(CGM.getContext());
   1100   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
   1101     ThreadPrivateWithDefinition.insert(VD);
   1102     QualType ASTTy = VD->getType();
   1103 
   1104     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
   1105     auto Init = VD->getAnyInitializer();
   1106     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
   1107       // Generate function that re-emits the declaration's initializer into the
   1108       // threadprivate copy of the variable VD
   1109       CodeGenFunction CtorCGF(CGM);
   1110       FunctionArgList Args;
   1111       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
   1112                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
   1113       Args.push_back(&Dst);
   1114 
   1115       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
   1116           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
   1117           /*isVariadic=*/false);
   1118       auto FTy = CGM.getTypes().GetFunctionType(FI);
   1119       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
   1120           FTy, ".__kmpc_global_ctor_.", FI, Loc);
   1121       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
   1122                             Args, SourceLocation());
   1123       auto ArgVal = CtorCGF.EmitLoadOfScalar(
   1124           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
   1125           CGM.getContext().VoidPtrTy, Dst.getLocation());
   1126       Address Arg = Address(ArgVal, VDAddr.getAlignment());
   1127       Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
   1128                                              CtorCGF.ConvertTypeForMem(ASTTy));
   1129       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
   1130                                /*IsInitializer=*/true);
   1131       ArgVal = CtorCGF.EmitLoadOfScalar(
   1132           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
   1133           CGM.getContext().VoidPtrTy, Dst.getLocation());
   1134       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
   1135       CtorCGF.FinishFunction();
   1136       Ctor = Fn;
   1137     }
   1138     if (VD->getType().isDestructedType() != QualType::DK_none) {
   1139       // Generate function that emits destructor call for the threadprivate copy
   1140       // of the variable VD
   1141       CodeGenFunction DtorCGF(CGM);
   1142       FunctionArgList Args;
   1143       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
   1144                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
   1145       Args.push_back(&Dst);
   1146 
   1147       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
   1148           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
   1149           /*isVariadic=*/false);
   1150       auto FTy = CGM.getTypes().GetFunctionType(FI);
   1151       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
   1152           FTy, ".__kmpc_global_dtor_.", FI, Loc);
   1153       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
   1154                             SourceLocation());
   1155       auto ArgVal = DtorCGF.EmitLoadOfScalar(
   1156           DtorCGF.GetAddrOfLocalVar(&Dst),
   1157           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
   1158       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
   1159                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
   1160                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
   1161       DtorCGF.FinishFunction();
   1162       Dtor = Fn;
   1163     }
   1164     // Do not emit init function if it is not required.
   1165     if (!Ctor && !Dtor)
   1166       return nullptr;
   1167 
   1168     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
   1169     auto CopyCtorTy =
   1170         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
   1171                                 /*isVarArg=*/false)->getPointerTo();
   1172     // Copying constructor for the threadprivate variable.
   1173     // Must be NULL - reserved by runtime, but currently it requires that this
   1174     // parameter is always NULL. Otherwise it fires assertion.
   1175     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
   1176     if (Ctor == nullptr) {
   1177       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
   1178                                             /*isVarArg=*/false)->getPointerTo();
   1179       Ctor = llvm::Constant::getNullValue(CtorTy);
   1180     }
   1181     if (Dtor == nullptr) {
   1182       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
   1183                                             /*isVarArg=*/false)->getPointerTo();
   1184       Dtor = llvm::Constant::getNullValue(DtorTy);
   1185     }
   1186     if (!CGF) {
   1187       auto InitFunctionTy =
   1188           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
   1189       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
   1190           InitFunctionTy, ".__omp_threadprivate_init_.",
   1191           CGM.getTypes().arrangeNullaryFunction());
   1192       CodeGenFunction InitCGF(CGM);
   1193       FunctionArgList ArgList;
   1194       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
   1195                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
   1196                             Loc);
   1197       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
   1198       InitCGF.FinishFunction();
   1199       return InitFunction;
   1200     }
   1201     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
   1202   }
   1203   return nullptr;
   1204 }
   1205 
   1206 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
   1207 /// function. Here is the logic:
   1208 /// if (Cond) {
   1209 ///   ThenGen();
   1210 /// } else {
   1211 ///   ElseGen();
   1212 /// }
   1213 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
   1214                             const RegionCodeGenTy &ThenGen,
   1215                             const RegionCodeGenTy &ElseGen) {
   1216   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
   1217 
   1218   // If the condition constant folds and can be elided, try to avoid emitting
   1219   // the condition and the dead arm of the if/else.
   1220   bool CondConstant;
   1221   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
   1222     CodeGenFunction::RunCleanupsScope Scope(CGF);
   1223     if (CondConstant) {
   1224       ThenGen(CGF);
   1225     } else {
   1226       ElseGen(CGF);
   1227     }
   1228     return;
   1229   }
   1230 
   1231   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
   1232   // emit the conditional branch.
   1233   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
   1234   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
   1235   auto ContBlock = CGF.createBasicBlock("omp_if.end");
   1236   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
   1237 
   1238   // Emit the 'then' code.
   1239   CGF.EmitBlock(ThenBlock);
   1240   {
   1241     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
   1242     ThenGen(CGF);
   1243   }
   1244   CGF.EmitBranch(ContBlock);
   1245   // Emit the 'else' code if present.
   1246   {
   1247     // There is no need to emit line number for unconditional branch.
   1248     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
   1249     CGF.EmitBlock(ElseBlock);
   1250   }
   1251   {
   1252     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
   1253     ElseGen(CGF);
   1254   }
   1255   {
   1256     // There is no need to emit line number for unconditional branch.
   1257     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
   1258     CGF.EmitBranch(ContBlock);
   1259   }
   1260   // Emit the continuation block for code after the if.
   1261   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
   1262 }
   1263 
   1264 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
   1265                                        llvm::Value *OutlinedFn,
   1266                                        ArrayRef<llvm::Value *> CapturedVars,
   1267                                        const Expr *IfCond) {
   1268   if (!CGF.HaveInsertPoint())
   1269     return;
   1270   auto *RTLoc = emitUpdateLocation(CGF, Loc);
   1271   auto &&ThenGen = [this, OutlinedFn, CapturedVars,
   1272                     RTLoc](CodeGenFunction &CGF) {
   1273     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
   1274     llvm::Value *Args[] = {
   1275         RTLoc,
   1276         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
   1277         CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
   1278     llvm::SmallVector<llvm::Value *, 16> RealArgs;
   1279     RealArgs.append(std::begin(Args), std::end(Args));
   1280     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
   1281 
   1282     auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
   1283     CGF.EmitRuntimeCall(RTLFn, RealArgs);
   1284   };
   1285   auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc,
   1286                     Loc](CodeGenFunction &CGF) {
   1287     auto ThreadID = getThreadID(CGF, Loc);
   1288     // Build calls:
   1289     // __kmpc_serialized_parallel(&Loc, GTid);
   1290     llvm::Value *Args[] = {RTLoc, ThreadID};
   1291     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
   1292                         Args);
   1293 
   1294     // OutlinedFn(&GTid, &zero, CapturedStruct);
   1295     auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
   1296     Address ZeroAddr =
   1297       CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
   1298                            /*Name*/ ".zero.addr");
   1299     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
   1300     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
   1301     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
   1302     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
   1303     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
   1304     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
   1305 
   1306     // __kmpc_end_serialized_parallel(&Loc, GTid);
   1307     llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
   1308     CGF.EmitRuntimeCall(
   1309         createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
   1310   };
   1311   if (IfCond) {
   1312     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
   1313   } else {
   1314     CodeGenFunction::RunCleanupsScope Scope(CGF);
   1315     ThenGen(CGF);
   1316   }
   1317 }
   1318 
   1319 // If we're inside an (outlined) parallel region, use the region info's
   1320 // thread-ID variable (it is passed in a first argument of the outlined function
   1321 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
   1322 // regular serial code region, get thread ID by calling kmp_int32
   1323 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
   1324 // return the address of that temp.
   1325 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
   1326                                              SourceLocation Loc) {
   1327   if (auto OMPRegionInfo =
   1328           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
   1329     if (OMPRegionInfo->getThreadIDVariable())
   1330       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
   1331 
   1332   auto ThreadID = getThreadID(CGF, Loc);
   1333   auto Int32Ty =
   1334       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
   1335   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
   1336   CGF.EmitStoreOfScalar(ThreadID,
   1337                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
   1338 
   1339   return ThreadIDTemp;
   1340 }
   1341 
   1342 llvm::Constant *
   1343 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
   1344                                              const llvm::Twine &Name) {
   1345   SmallString<256> Buffer;
   1346   llvm::raw_svector_ostream Out(Buffer);
   1347   Out << Name;
   1348   auto RuntimeName = Out.str();
   1349   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
   1350   if (Elem.second) {
   1351     assert(Elem.second->getType()->getPointerElementType() == Ty &&
   1352            "OMP internal variable has different type than requested");
   1353     return &*Elem.second;
   1354   }
   1355 
   1356   return Elem.second = new llvm::GlobalVariable(
   1357              CGM.getModule(), Ty, /*IsConstant*/ false,
   1358              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
   1359              Elem.first());
   1360 }
   1361 
   1362 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
   1363   llvm::Twine Name(".gomp_critical_user_", CriticalName);
   1364   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
   1365 }
   1366 
   1367 namespace {
   1368 template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup {
   1369   llvm::Value *Callee;
   1370   llvm::Value *Args[N];
   1371 
   1372 public:
   1373   CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs)
   1374       : Callee(Callee) {
   1375     assert(CleanupArgs.size() == N);
   1376     std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args));
   1377   }
   1378   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
   1379     if (!CGF.HaveInsertPoint())
   1380       return;
   1381     CGF.EmitRuntimeCall(Callee, Args);
   1382   }
   1383 };
   1384 } // anonymous namespace
   1385 
   1386 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
   1387                                          StringRef CriticalName,
   1388                                          const RegionCodeGenTy &CriticalOpGen,
   1389                                          SourceLocation Loc, const Expr *Hint) {
   1390   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
   1391   // CriticalOpGen();
   1392   // __kmpc_end_critical(ident_t *, gtid, Lock);
   1393   // Prepare arguments and build a call to __kmpc_critical
   1394   if (!CGF.HaveInsertPoint())
   1395     return;
   1396   CodeGenFunction::RunCleanupsScope Scope(CGF);
   1397   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
   1398                          getCriticalRegionLock(CriticalName)};
   1399   if (Hint) {
   1400     llvm::SmallVector<llvm::Value *, 8> ArgsWithHint(std::begin(Args),
   1401                                                      std::end(Args));
   1402     auto *HintVal = CGF.EmitScalarExpr(Hint);
   1403     ArgsWithHint.push_back(
   1404         CGF.Builder.CreateIntCast(HintVal, CGM.IntPtrTy, /*isSigned=*/false));
   1405     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical_with_hint),
   1406                         ArgsWithHint);
   1407   } else
   1408     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
   1409   // Build a call to __kmpc_end_critical
   1410   CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
   1411       NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
   1412       llvm::makeArrayRef(Args));
   1413   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
   1414 }
   1415 
   1416 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
   1417                        OpenMPDirectiveKind Kind, SourceLocation Loc,
   1418                        const RegionCodeGenTy &BodyOpGen) {
   1419   llvm::Value *CallBool = CGF.EmitScalarConversion(
   1420       IfCond,
   1421       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
   1422       CGF.getContext().BoolTy, Loc);
   1423 
   1424   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
   1425   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
   1426   // Generate the branch (If-stmt)
   1427   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
   1428   CGF.EmitBlock(ThenBlock);
   1429   CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen);
   1430   // Emit the rest of bblocks/branches
   1431   CGF.EmitBranch(ContBlock);
   1432   CGF.EmitBlock(ContBlock, true);
   1433 }
   1434 
   1435 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
   1436                                        const RegionCodeGenTy &MasterOpGen,
   1437                                        SourceLocation Loc) {
   1438   if (!CGF.HaveInsertPoint())
   1439     return;
   1440   // if(__kmpc_master(ident_t *, gtid)) {
   1441   //   MasterOpGen();
   1442   //   __kmpc_end_master(ident_t *, gtid);
   1443   // }
   1444   // Prepare arguments and build a call to __kmpc_master
   1445   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
   1446   auto *IsMaster =
   1447       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
   1448   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
   1449       MasterCallEndCleanup;
   1450   emitIfStmt(
   1451       CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void {
   1452         CodeGenFunction::RunCleanupsScope Scope(CGF);
   1453         CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
   1454             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
   1455             llvm::makeArrayRef(Args));
   1456         MasterOpGen(CGF);
   1457       });
   1458 }
   1459 
   1460 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
   1461                                         SourceLocation Loc) {
   1462   if (!CGF.HaveInsertPoint())
   1463     return;
   1464   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
   1465   llvm::Value *Args[] = {
   1466       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
   1467       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
   1468   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
   1469 }
   1470 
   1471 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
   1472                                           const RegionCodeGenTy &TaskgroupOpGen,
   1473                                           SourceLocation Loc) {
   1474   if (!CGF.HaveInsertPoint())
   1475     return;
   1476   // __kmpc_taskgroup(ident_t *, gtid);
   1477   // TaskgroupOpGen();
   1478   // __kmpc_end_taskgroup(ident_t *, gtid);
   1479   // Prepare arguments and build a call to __kmpc_taskgroup
   1480   {
   1481     CodeGenFunction::RunCleanupsScope Scope(CGF);
   1482     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
   1483     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args);
   1484     // Build a call to __kmpc_end_taskgroup
   1485     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
   1486         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
   1487         llvm::makeArrayRef(Args));
   1488     emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
   1489   }
   1490 }
   1491 
   1492 /// Given an array of pointers to variables, project the address of a
   1493 /// given variable.
   1494 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
   1495                                       unsigned Index, const VarDecl *Var) {
   1496   // Pull out the pointer to the variable.
   1497   Address PtrAddr =
   1498       CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
   1499   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
   1500 
   1501   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
   1502   Addr = CGF.Builder.CreateElementBitCast(
   1503       Addr, CGF.ConvertTypeForMem(Var->getType()));
   1504   return Addr;
   1505 }
   1506 
   1507 static llvm::Value *emitCopyprivateCopyFunction(
   1508     CodeGenModule &CGM, llvm::Type *ArgsType,
   1509     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
   1510     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
   1511   auto &C = CGM.getContext();
   1512   // void copy_func(void *LHSArg, void *RHSArg);
   1513   FunctionArgList Args;
   1514   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
   1515                            C.VoidPtrTy);
   1516   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
   1517                            C.VoidPtrTy);
   1518   Args.push_back(&LHSArg);
   1519   Args.push_back(&RHSArg);
   1520   FunctionType::ExtInfo EI;
   1521   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
   1522       C.VoidTy, Args, EI, /*isVariadic=*/false);
   1523   auto *Fn = llvm::Function::Create(
   1524       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
   1525       ".omp.copyprivate.copy_func", &CGM.getModule());
   1526   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
   1527   CodeGenFunction CGF(CGM);
   1528   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
   1529   // Dest = (void*[n])(LHSArg);
   1530   // Src = (void*[n])(RHSArg);
   1531   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   1532       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
   1533       ArgsType), CGF.getPointerAlign());
   1534   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   1535       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
   1536       ArgsType), CGF.getPointerAlign());
   1537   // *(Type0*)Dst[0] = *(Type0*)Src[0];
   1538   // *(Type1*)Dst[1] = *(Type1*)Src[1];
   1539   // ...
   1540   // *(Typen*)Dst[n] = *(Typen*)Src[n];
   1541   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
   1542     auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
   1543     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
   1544 
   1545     auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
   1546     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
   1547 
   1548     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
   1549     QualType Type = VD->getType();
   1550     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
   1551   }
   1552   CGF.FinishFunction();
   1553   return Fn;
   1554 }
   1555 
   1556 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
   1557                                        const RegionCodeGenTy &SingleOpGen,
   1558                                        SourceLocation Loc,
   1559                                        ArrayRef<const Expr *> CopyprivateVars,
   1560                                        ArrayRef<const Expr *> SrcExprs,
   1561                                        ArrayRef<const Expr *> DstExprs,
   1562                                        ArrayRef<const Expr *> AssignmentOps) {
   1563   if (!CGF.HaveInsertPoint())
   1564     return;
   1565   assert(CopyprivateVars.size() == SrcExprs.size() &&
   1566          CopyprivateVars.size() == DstExprs.size() &&
   1567          CopyprivateVars.size() == AssignmentOps.size());
   1568   auto &C = CGM.getContext();
   1569   // int32 did_it = 0;
   1570   // if(__kmpc_single(ident_t *, gtid)) {
   1571   //   SingleOpGen();
   1572   //   __kmpc_end_single(ident_t *, gtid);
   1573   //   did_it = 1;
   1574   // }
   1575   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
   1576   // <copy_func>, did_it);
   1577 
   1578   Address DidIt = Address::invalid();
   1579   if (!CopyprivateVars.empty()) {
   1580     // int32 did_it = 0;
   1581     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
   1582     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
   1583     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
   1584   }
   1585   // Prepare arguments and build a call to __kmpc_single
   1586   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
   1587   auto *IsSingle =
   1588       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
   1589   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
   1590       SingleCallEndCleanup;
   1591   emitIfStmt(
   1592       CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void {
   1593         CodeGenFunction::RunCleanupsScope Scope(CGF);
   1594         CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
   1595             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
   1596             llvm::makeArrayRef(Args));
   1597         SingleOpGen(CGF);
   1598         if (DidIt.isValid()) {
   1599           // did_it = 1;
   1600           CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
   1601         }
   1602       });
   1603   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
   1604   // <copy_func>, did_it);
   1605   if (DidIt.isValid()) {
   1606     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
   1607     auto CopyprivateArrayTy =
   1608         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
   1609                                /*IndexTypeQuals=*/0);
   1610     // Create a list of all private variables for copyprivate.
   1611     Address CopyprivateList =
   1612         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
   1613     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
   1614       Address Elem = CGF.Builder.CreateConstArrayGEP(
   1615           CopyprivateList, I, CGF.getPointerSize());
   1616       CGF.Builder.CreateStore(
   1617           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   1618               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
   1619           Elem);
   1620     }
   1621     // Build function that copies private values from single region to all other
   1622     // threads in the corresponding parallel region.
   1623     auto *CpyFn = emitCopyprivateCopyFunction(
   1624         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
   1625         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
   1626     auto *BufSize = getTypeSize(CGF, CopyprivateArrayTy);
   1627     Address CL =
   1628       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
   1629                                                       CGF.VoidPtrTy);
   1630     auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
   1631     llvm::Value *Args[] = {
   1632         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
   1633         getThreadID(CGF, Loc),        // i32 <gtid>
   1634         BufSize,                      // size_t <buf_size>
   1635         CL.getPointer(),              // void *<copyprivate list>
   1636         CpyFn,                        // void (*) (void *, void *) <copy_func>
   1637         DidItVal                      // i32 did_it
   1638     };
   1639     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
   1640   }
   1641 }
   1642 
   1643 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
   1644                                         const RegionCodeGenTy &OrderedOpGen,
   1645                                         SourceLocation Loc, bool IsThreads) {
   1646   if (!CGF.HaveInsertPoint())
   1647     return;
   1648   // __kmpc_ordered(ident_t *, gtid);
   1649   // OrderedOpGen();
   1650   // __kmpc_end_ordered(ident_t *, gtid);
   1651   // Prepare arguments and build a call to __kmpc_ordered
   1652   CodeGenFunction::RunCleanupsScope Scope(CGF);
   1653   if (IsThreads) {
   1654     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
   1655     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
   1656     // Build a call to __kmpc_end_ordered
   1657     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
   1658         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
   1659         llvm::makeArrayRef(Args));
   1660   }
   1661   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
   1662 }
   1663 
   1664 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
   1665                                       OpenMPDirectiveKind Kind, bool EmitChecks,
   1666                                       bool ForceSimpleCall) {
   1667   if (!CGF.HaveInsertPoint())
   1668     return;
   1669   // Build call __kmpc_cancel_barrier(loc, thread_id);
   1670   // Build call __kmpc_barrier(loc, thread_id);
   1671   OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
   1672   if (Kind == OMPD_for) {
   1673     Flags =
   1674         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
   1675   } else if (Kind == OMPD_sections) {
   1676     Flags = static_cast<OpenMPLocationFlags>(Flags |
   1677                                              OMP_IDENT_BARRIER_IMPL_SECTIONS);
   1678   } else if (Kind == OMPD_single) {
   1679     Flags =
   1680         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
   1681   } else if (Kind == OMPD_barrier) {
   1682     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
   1683   } else {
   1684     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
   1685   }
   1686   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
   1687   // thread_id);
   1688   auto *OMPRegionInfo =
   1689       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
   1690   // Do not emit barrier call in the single directive emitted in some rare cases
   1691   // for sections directives.
   1692   if (OMPRegionInfo && OMPRegionInfo->getDirectiveKind() == OMPD_single)
   1693     return;
   1694   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
   1695                          getThreadID(CGF, Loc)};
   1696   if (OMPRegionInfo) {
   1697     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
   1698       auto *Result = CGF.EmitRuntimeCall(
   1699           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
   1700       if (EmitChecks) {
   1701         // if (__kmpc_cancel_barrier()) {
   1702         //   exit from construct;
   1703         // }
   1704         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
   1705         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
   1706         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
   1707         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
   1708         CGF.EmitBlock(ExitBB);
   1709         //   exit from construct;
   1710         auto CancelDestination =
   1711             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
   1712         CGF.EmitBranchThroughCleanup(CancelDestination);
   1713         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
   1714       }
   1715       return;
   1716     }
   1717   }
   1718   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
   1719 }
   1720 
   1721 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
   1722 /// the enum sched_type in kmp.h).
   1723 enum OpenMPSchedType {
   1724   /// \brief Lower bound for default (unordered) versions.
   1725   OMP_sch_lower = 32,
   1726   OMP_sch_static_chunked = 33,
   1727   OMP_sch_static = 34,
   1728   OMP_sch_dynamic_chunked = 35,
   1729   OMP_sch_guided_chunked = 36,
   1730   OMP_sch_runtime = 37,
   1731   OMP_sch_auto = 38,
   1732   /// \brief Lower bound for 'ordered' versions.
   1733   OMP_ord_lower = 64,
   1734   OMP_ord_static_chunked = 65,
   1735   OMP_ord_static = 66,
   1736   OMP_ord_dynamic_chunked = 67,
   1737   OMP_ord_guided_chunked = 68,
   1738   OMP_ord_runtime = 69,
   1739   OMP_ord_auto = 70,
   1740   OMP_sch_default = OMP_sch_static,
   1741 };
   1742 
   1743 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
   1744 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
   1745                                           bool Chunked, bool Ordered) {
   1746   switch (ScheduleKind) {
   1747   case OMPC_SCHEDULE_static:
   1748     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
   1749                    : (Ordered ? OMP_ord_static : OMP_sch_static);
   1750   case OMPC_SCHEDULE_dynamic:
   1751     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
   1752   case OMPC_SCHEDULE_guided:
   1753     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
   1754   case OMPC_SCHEDULE_runtime:
   1755     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
   1756   case OMPC_SCHEDULE_auto:
   1757     return Ordered ? OMP_ord_auto : OMP_sch_auto;
   1758   case OMPC_SCHEDULE_unknown:
   1759     assert(!Chunked && "chunk was specified but schedule kind not known");
   1760     return Ordered ? OMP_ord_static : OMP_sch_static;
   1761   }
   1762   llvm_unreachable("Unexpected runtime schedule");
   1763 }
   1764 
   1765 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
   1766                                          bool Chunked) const {
   1767   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
   1768   return Schedule == OMP_sch_static;
   1769 }
   1770 
   1771 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
   1772   auto Schedule =
   1773       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
   1774   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
   1775   return Schedule != OMP_sch_static;
   1776 }
   1777 
   1778 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
   1779                                           SourceLocation Loc,
   1780                                           OpenMPScheduleClauseKind ScheduleKind,
   1781                                           unsigned IVSize, bool IVSigned,
   1782                                           bool Ordered, llvm::Value *UB,
   1783                                           llvm::Value *Chunk) {
   1784   if (!CGF.HaveInsertPoint())
   1785     return;
   1786   OpenMPSchedType Schedule =
   1787       getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
   1788   assert(Ordered ||
   1789          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
   1790           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked));
   1791   // Call __kmpc_dispatch_init(
   1792   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
   1793   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
   1794   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
   1795 
   1796   // If the Chunk was not specified in the clause - use default value 1.
   1797   if (Chunk == nullptr)
   1798     Chunk = CGF.Builder.getIntN(IVSize, 1);
   1799   llvm::Value *Args[] = {
   1800     emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
   1801     getThreadID(CGF, Loc),
   1802     CGF.Builder.getInt32(Schedule), // Schedule type
   1803     CGF.Builder.getIntN(IVSize, 0), // Lower
   1804     UB,                             // Upper
   1805     CGF.Builder.getIntN(IVSize, 1), // Stride
   1806     Chunk                           // Chunk
   1807   };
   1808   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
   1809 }
   1810 
   1811 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
   1812                                         SourceLocation Loc,
   1813                                         OpenMPScheduleClauseKind ScheduleKind,
   1814                                         unsigned IVSize, bool IVSigned,
   1815                                         bool Ordered, Address IL, Address LB,
   1816                                         Address UB, Address ST,
   1817                                         llvm::Value *Chunk) {
   1818   if (!CGF.HaveInsertPoint())
   1819     return;
   1820   OpenMPSchedType Schedule =
   1821     getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
   1822   assert(!Ordered);
   1823   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
   1824          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked);
   1825 
   1826   // Call __kmpc_for_static_init(
   1827   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
   1828   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
   1829   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
   1830   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
   1831   if (Chunk == nullptr) {
   1832     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
   1833            "expected static non-chunked schedule");
   1834     // If the Chunk was not specified in the clause - use default value 1.
   1835       Chunk = CGF.Builder.getIntN(IVSize, 1);
   1836   } else {
   1837     assert((Schedule == OMP_sch_static_chunked ||
   1838             Schedule == OMP_ord_static_chunked) &&
   1839            "expected static chunked schedule");
   1840   }
   1841   llvm::Value *Args[] = {
   1842     emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
   1843     getThreadID(CGF, Loc),
   1844     CGF.Builder.getInt32(Schedule), // Schedule type
   1845     IL.getPointer(),                // &isLastIter
   1846     LB.getPointer(),                // &LB
   1847     UB.getPointer(),                // &UB
   1848     ST.getPointer(),                // &Stride
   1849     CGF.Builder.getIntN(IVSize, 1), // Incr
   1850     Chunk                           // Chunk
   1851   };
   1852   CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
   1853 }
   1854 
   1855 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
   1856                                           SourceLocation Loc) {
   1857   if (!CGF.HaveInsertPoint())
   1858     return;
   1859   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
   1860   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
   1861                          getThreadID(CGF, Loc)};
   1862   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
   1863                       Args);
   1864 }
   1865 
   1866 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
   1867                                                  SourceLocation Loc,
   1868                                                  unsigned IVSize,
   1869                                                  bool IVSigned) {
   1870   if (!CGF.HaveInsertPoint())
   1871     return;
   1872   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
   1873   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
   1874                          getThreadID(CGF, Loc)};
   1875   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
   1876 }
   1877 
   1878 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
   1879                                           SourceLocation Loc, unsigned IVSize,
   1880                                           bool IVSigned, Address IL,
   1881                                           Address LB, Address UB,
   1882                                           Address ST) {
   1883   // Call __kmpc_dispatch_next(
   1884   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
   1885   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
   1886   //          kmp_int[32|64] *p_stride);
   1887   llvm::Value *Args[] = {
   1888       emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
   1889       IL.getPointer(), // &isLastIter
   1890       LB.getPointer(), // &Lower
   1891       UB.getPointer(), // &Upper
   1892       ST.getPointer()  // &Stride
   1893   };
   1894   llvm::Value *Call =
   1895       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
   1896   return CGF.EmitScalarConversion(
   1897       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
   1898       CGF.getContext().BoolTy, Loc);
   1899 }
   1900 
   1901 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
   1902                                            llvm::Value *NumThreads,
   1903                                            SourceLocation Loc) {
   1904   if (!CGF.HaveInsertPoint())
   1905     return;
   1906   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
   1907   llvm::Value *Args[] = {
   1908       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
   1909       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
   1910   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
   1911                       Args);
   1912 }
   1913 
   1914 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
   1915                                          OpenMPProcBindClauseKind ProcBind,
   1916                                          SourceLocation Loc) {
   1917   if (!CGF.HaveInsertPoint())
   1918     return;
   1919   // Constants for proc bind value accepted by the runtime.
   1920   enum ProcBindTy {
   1921     ProcBindFalse = 0,
   1922     ProcBindTrue,
   1923     ProcBindMaster,
   1924     ProcBindClose,
   1925     ProcBindSpread,
   1926     ProcBindIntel,
   1927     ProcBindDefault
   1928   } RuntimeProcBind;
   1929   switch (ProcBind) {
   1930   case OMPC_PROC_BIND_master:
   1931     RuntimeProcBind = ProcBindMaster;
   1932     break;
   1933   case OMPC_PROC_BIND_close:
   1934     RuntimeProcBind = ProcBindClose;
   1935     break;
   1936   case OMPC_PROC_BIND_spread:
   1937     RuntimeProcBind = ProcBindSpread;
   1938     break;
   1939   case OMPC_PROC_BIND_unknown:
   1940     llvm_unreachable("Unsupported proc_bind value.");
   1941   }
   1942   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
   1943   llvm::Value *Args[] = {
   1944       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
   1945       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
   1946   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
   1947 }
   1948 
   1949 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
   1950                                 SourceLocation Loc) {
   1951   if (!CGF.HaveInsertPoint())
   1952     return;
   1953   // Build call void __kmpc_flush(ident_t *loc)
   1954   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
   1955                       emitUpdateLocation(CGF, Loc));
   1956 }
   1957 
   1958 namespace {
   1959 /// \brief Indexes of fields for type kmp_task_t.
   1960 enum KmpTaskTFields {
   1961   /// \brief List of shared variables.
   1962   KmpTaskTShareds,
   1963   /// \brief Task routine.
   1964   KmpTaskTRoutine,
   1965   /// \brief Partition id for the untied tasks.
   1966   KmpTaskTPartId,
   1967   /// \brief Function with call of destructors for private variables.
   1968   KmpTaskTDestructors,
   1969 };
   1970 } // anonymous namespace
   1971 
   1972 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
   1973   if (!KmpRoutineEntryPtrTy) {
   1974     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
   1975     auto &C = CGM.getContext();
   1976     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
   1977     FunctionProtoType::ExtProtoInfo EPI;
   1978     KmpRoutineEntryPtrQTy = C.getPointerType(
   1979         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
   1980     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
   1981   }
   1982 }
   1983 
   1984 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
   1985                                        QualType FieldTy) {
   1986   auto *Field = FieldDecl::Create(
   1987       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
   1988       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
   1989       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
   1990   Field->setAccess(AS_public);
   1991   DC->addDecl(Field);
   1992   return Field;
   1993 }
   1994 
   1995 namespace {
   1996 struct PrivateHelpersTy {
   1997   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
   1998                    const VarDecl *PrivateElemInit)
   1999       : Original(Original), PrivateCopy(PrivateCopy),
   2000         PrivateElemInit(PrivateElemInit) {}
   2001   const VarDecl *Original;
   2002   const VarDecl *PrivateCopy;
   2003   const VarDecl *PrivateElemInit;
   2004 };
   2005 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
   2006 } // anonymous namespace
   2007 
   2008 static RecordDecl *
   2009 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
   2010   if (!Privates.empty()) {
   2011     auto &C = CGM.getContext();
   2012     // Build struct .kmp_privates_t. {
   2013     //         /*  private vars  */
   2014     //       };
   2015     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
   2016     RD->startDefinition();
   2017     for (auto &&Pair : Privates) {
   2018       auto *VD = Pair.second.Original;
   2019       auto Type = VD->getType();
   2020       Type = Type.getNonReferenceType();
   2021       auto *FD = addFieldToRecordDecl(C, RD, Type);
   2022       if (VD->hasAttrs()) {
   2023         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
   2024              E(VD->getAttrs().end());
   2025              I != E; ++I)
   2026           FD->addAttr(*I);
   2027       }
   2028     }
   2029     RD->completeDefinition();
   2030     return RD;
   2031   }
   2032   return nullptr;
   2033 }
   2034 
   2035 static RecordDecl *
   2036 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
   2037                          QualType KmpRoutineEntryPointerQTy) {
   2038   auto &C = CGM.getContext();
   2039   // Build struct kmp_task_t {
   2040   //         void *              shareds;
   2041   //         kmp_routine_entry_t routine;
   2042   //         kmp_int32           part_id;
   2043   //         kmp_routine_entry_t destructors;
   2044   //       };
   2045   auto *RD = C.buildImplicitRecord("kmp_task_t");
   2046   RD->startDefinition();
   2047   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
   2048   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
   2049   addFieldToRecordDecl(C, RD, KmpInt32Ty);
   2050   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
   2051   RD->completeDefinition();
   2052   return RD;
   2053 }
   2054 
   2055 static RecordDecl *
   2056 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
   2057                                      ArrayRef<PrivateDataTy> Privates) {
   2058   auto &C = CGM.getContext();
   2059   // Build struct kmp_task_t_with_privates {
   2060   //         kmp_task_t task_data;
   2061   //         .kmp_privates_t. privates;
   2062   //       };
   2063   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
   2064   RD->startDefinition();
   2065   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
   2066   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
   2067     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
   2068   }
   2069   RD->completeDefinition();
   2070   return RD;
   2071 }
   2072 
   2073 /// \brief Emit a proxy function which accepts kmp_task_t as the second
   2074 /// argument.
   2075 /// \code
   2076 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
   2077 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
   2078 ///   tt->shareds);
   2079 ///   return 0;
   2080 /// }
   2081 /// \endcode
   2082 static llvm::Value *
   2083 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
   2084                       QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
   2085                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
   2086                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
   2087                       llvm::Value *TaskPrivatesMap) {
   2088   auto &C = CGM.getContext();
   2089   FunctionArgList Args;
   2090   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
   2091   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
   2092                                 /*Id=*/nullptr,
   2093                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
   2094   Args.push_back(&GtidArg);
   2095   Args.push_back(&TaskTypeArg);
   2096   FunctionType::ExtInfo Info;
   2097   auto &TaskEntryFnInfo =
   2098       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
   2099                                                     /*isVariadic=*/false);
   2100   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
   2101   auto *TaskEntry =
   2102       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
   2103                              ".omp_task_entry.", &CGM.getModule());
   2104   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
   2105   CodeGenFunction CGF(CGM);
   2106   CGF.disableDebugInfo();
   2107   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
   2108 
   2109   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
   2110   // tt->task_data.shareds);
   2111   auto *GtidParam = CGF.EmitLoadOfScalar(
   2112       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
   2113   LValue TDBase = emitLoadOfPointerLValue(
   2114       CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
   2115   auto *KmpTaskTWithPrivatesQTyRD =
   2116       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
   2117   LValue Base =
   2118       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
   2119   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
   2120   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
   2121   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
   2122   auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
   2123 
   2124   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
   2125   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
   2126   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   2127       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
   2128       CGF.ConvertTypeForMem(SharedsPtrTy));
   2129 
   2130   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
   2131   llvm::Value *PrivatesParam;
   2132   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
   2133     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
   2134     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   2135         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
   2136   } else {
   2137     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
   2138   }
   2139 
   2140   llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
   2141                              TaskPrivatesMap, SharedsParam};
   2142   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
   2143   CGF.EmitStoreThroughLValue(
   2144       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
   2145       CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
   2146   CGF.FinishFunction();
   2147   return TaskEntry;
   2148 }
   2149 
   2150 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
   2151                                             SourceLocation Loc,
   2152                                             QualType KmpInt32Ty,
   2153                                             QualType KmpTaskTWithPrivatesPtrQTy,
   2154                                             QualType KmpTaskTWithPrivatesQTy) {
   2155   auto &C = CGM.getContext();
   2156   FunctionArgList Args;
   2157   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
   2158   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
   2159                                 /*Id=*/nullptr,
   2160                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
   2161   Args.push_back(&GtidArg);
   2162   Args.push_back(&TaskTypeArg);
   2163   FunctionType::ExtInfo Info;
   2164   auto &DestructorFnInfo =
   2165       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
   2166                                                     /*isVariadic=*/false);
   2167   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
   2168   auto *DestructorFn =
   2169       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
   2170                              ".omp_task_destructor.", &CGM.getModule());
   2171   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
   2172                                     DestructorFnInfo);
   2173   CodeGenFunction CGF(CGM);
   2174   CGF.disableDebugInfo();
   2175   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
   2176                     Args);
   2177 
   2178   LValue Base = emitLoadOfPointerLValue(
   2179       CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
   2180   auto *KmpTaskTWithPrivatesQTyRD =
   2181       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
   2182   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
   2183   Base = CGF.EmitLValueForField(Base, *FI);
   2184   for (auto *Field :
   2185        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
   2186     if (auto DtorKind = Field->getType().isDestructedType()) {
   2187       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
   2188       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
   2189     }
   2190   }
   2191   CGF.FinishFunction();
   2192   return DestructorFn;
   2193 }
   2194 
   2195 /// \brief Emit a privates mapping function for correct handling of private and
   2196 /// firstprivate variables.
   2197 /// \code
   2198 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
   2199 /// **noalias priv1,...,  <tyn> **noalias privn) {
   2200 ///   *priv1 = &.privates.priv1;
   2201 ///   ...;
   2202 ///   *privn = &.privates.privn;
   2203 /// }
   2204 /// \endcode
   2205 static llvm::Value *
   2206 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
   2207                                ArrayRef<const Expr *> PrivateVars,
   2208                                ArrayRef<const Expr *> FirstprivateVars,
   2209                                QualType PrivatesQTy,
   2210                                ArrayRef<PrivateDataTy> Privates) {
   2211   auto &C = CGM.getContext();
   2212   FunctionArgList Args;
   2213   ImplicitParamDecl TaskPrivatesArg(
   2214       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
   2215       C.getPointerType(PrivatesQTy).withConst().withRestrict());
   2216   Args.push_back(&TaskPrivatesArg);
   2217   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
   2218   unsigned Counter = 1;
   2219   for (auto *E: PrivateVars) {
   2220     Args.push_back(ImplicitParamDecl::Create(
   2221         C, /*DC=*/nullptr, Loc,
   2222         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
   2223                             .withConst()
   2224                             .withRestrict()));
   2225     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
   2226     PrivateVarsPos[VD] = Counter;
   2227     ++Counter;
   2228   }
   2229   for (auto *E : FirstprivateVars) {
   2230     Args.push_back(ImplicitParamDecl::Create(
   2231         C, /*DC=*/nullptr, Loc,
   2232         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
   2233                             .withConst()
   2234                             .withRestrict()));
   2235     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
   2236     PrivateVarsPos[VD] = Counter;
   2237     ++Counter;
   2238   }
   2239   FunctionType::ExtInfo Info;
   2240   auto &TaskPrivatesMapFnInfo =
   2241       CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info,
   2242                                                     /*isVariadic=*/false);
   2243   auto *TaskPrivatesMapTy =
   2244       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
   2245   auto *TaskPrivatesMap = llvm::Function::Create(
   2246       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
   2247       ".omp_task_privates_map.", &CGM.getModule());
   2248   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
   2249                                     TaskPrivatesMapFnInfo);
   2250   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
   2251   CodeGenFunction CGF(CGM);
   2252   CGF.disableDebugInfo();
   2253   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
   2254                     TaskPrivatesMapFnInfo, Args);
   2255 
   2256   // *privi = &.privates.privi;
   2257   LValue Base = emitLoadOfPointerLValue(
   2258       CGF, CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType());
   2259   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
   2260   Counter = 0;
   2261   for (auto *Field : PrivatesQTyRD->fields()) {
   2262     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
   2263     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
   2264     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
   2265     auto RefLoadLVal =
   2266         emitLoadOfPointerLValue(CGF, RefLVal.getAddress(), RefLVal.getType());
   2267     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
   2268     ++Counter;
   2269   }
   2270   CGF.FinishFunction();
   2271   return TaskPrivatesMap;
   2272 }
   2273 
   2274 static int array_pod_sort_comparator(const PrivateDataTy *P1,
   2275                                      const PrivateDataTy *P2) {
   2276   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
   2277 }
   2278 
   2279 void CGOpenMPRuntime::emitTaskCall(
   2280     CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
   2281     bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
   2282     llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
   2283     const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
   2284     ArrayRef<const Expr *> PrivateCopies,
   2285     ArrayRef<const Expr *> FirstprivateVars,
   2286     ArrayRef<const Expr *> FirstprivateCopies,
   2287     ArrayRef<const Expr *> FirstprivateInits,
   2288     ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
   2289   if (!CGF.HaveInsertPoint())
   2290     return;
   2291   auto &C = CGM.getContext();
   2292   llvm::SmallVector<PrivateDataTy, 8> Privates;
   2293   // Aggregate privates and sort them by the alignment.
   2294   auto I = PrivateCopies.begin();
   2295   for (auto *E : PrivateVars) {
   2296     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
   2297     Privates.push_back(std::make_pair(
   2298         C.getDeclAlign(VD),
   2299         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
   2300                          /*PrivateElemInit=*/nullptr)));
   2301     ++I;
   2302   }
   2303   I = FirstprivateCopies.begin();
   2304   auto IElemInitRef = FirstprivateInits.begin();
   2305   for (auto *E : FirstprivateVars) {
   2306     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
   2307     Privates.push_back(std::make_pair(
   2308         C.getDeclAlign(VD),
   2309         PrivateHelpersTy(
   2310             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
   2311             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
   2312     ++I, ++IElemInitRef;
   2313   }
   2314   llvm::array_pod_sort(Privates.begin(), Privates.end(),
   2315                        array_pod_sort_comparator);
   2316   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
   2317   // Build type kmp_routine_entry_t (if not built yet).
   2318   emitKmpRoutineEntryT(KmpInt32Ty);
   2319   // Build type kmp_task_t (if not built yet).
   2320   if (KmpTaskTQTy.isNull()) {
   2321     KmpTaskTQTy = C.getRecordType(
   2322         createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
   2323   }
   2324   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
   2325   // Build particular struct kmp_task_t for the given task.
   2326   auto *KmpTaskTWithPrivatesQTyRD =
   2327       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
   2328   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
   2329   QualType KmpTaskTWithPrivatesPtrQTy =
   2330       C.getPointerType(KmpTaskTWithPrivatesQTy);
   2331   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
   2332   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
   2333   auto *KmpTaskTWithPrivatesTySize = getTypeSize(CGF, KmpTaskTWithPrivatesQTy);
   2334   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
   2335 
   2336   // Emit initial values for private copies (if any).
   2337   llvm::Value *TaskPrivatesMap = nullptr;
   2338   auto *TaskPrivatesMapTy =
   2339       std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
   2340                 3)
   2341           ->getType();
   2342   if (!Privates.empty()) {
   2343     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
   2344     TaskPrivatesMap = emitTaskPrivateMappingFunction(
   2345         CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates);
   2346     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   2347         TaskPrivatesMap, TaskPrivatesMapTy);
   2348   } else {
   2349     TaskPrivatesMap = llvm::ConstantPointerNull::get(
   2350         cast<llvm::PointerType>(TaskPrivatesMapTy));
   2351   }
   2352   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
   2353   // kmp_task_t *tt);
   2354   auto *TaskEntry = emitProxyTaskFunction(
   2355       CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
   2356       KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
   2357 
   2358   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
   2359   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
   2360   // kmp_routine_entry_t *task_entry);
   2361   // Task flags. Format is taken from
   2362   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
   2363   // description of kmp_tasking_flags struct.
   2364   const unsigned TiedFlag = 0x1;
   2365   const unsigned FinalFlag = 0x2;
   2366   unsigned Flags = Tied ? TiedFlag : 0;
   2367   auto *TaskFlags =
   2368       Final.getPointer()
   2369           ? CGF.Builder.CreateSelect(Final.getPointer(),
   2370                                      CGF.Builder.getInt32(FinalFlag),
   2371                                      CGF.Builder.getInt32(/*C=*/0))
   2372           : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
   2373   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
   2374   auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
   2375   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
   2376                               getThreadID(CGF, Loc), TaskFlags,
   2377                               KmpTaskTWithPrivatesTySize, SharedsSize,
   2378                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   2379                                   TaskEntry, KmpRoutineEntryPtrTy)};
   2380   auto *NewTask = CGF.EmitRuntimeCall(
   2381       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
   2382   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   2383       NewTask, KmpTaskTWithPrivatesPtrTy);
   2384   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
   2385                                                KmpTaskTWithPrivatesQTy);
   2386   LValue TDBase =
   2387       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
   2388   // Fill the data in the resulting kmp_task_t record.
   2389   // Copy shareds if there are any.
   2390   Address KmpTaskSharedsPtr = Address::invalid();
   2391   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
   2392     KmpTaskSharedsPtr =
   2393         Address(CGF.EmitLoadOfScalar(
   2394                     CGF.EmitLValueForField(
   2395                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
   2396                                            KmpTaskTShareds)),
   2397                     Loc),
   2398                 CGF.getNaturalTypeAlignment(SharedsTy));
   2399     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
   2400   }
   2401   // Emit initial values for private copies (if any).
   2402   bool NeedsCleanup = false;
   2403   if (!Privates.empty()) {
   2404     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
   2405     auto PrivatesBase = CGF.EmitLValueForField(Base, *FI);
   2406     FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
   2407     LValue SharedsBase;
   2408     if (!FirstprivateVars.empty()) {
   2409       SharedsBase = CGF.MakeAddrLValue(
   2410           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   2411               KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
   2412           SharedsTy);
   2413     }
   2414     CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
   2415         cast<CapturedStmt>(*D.getAssociatedStmt()));
   2416     for (auto &&Pair : Privates) {
   2417       auto *VD = Pair.second.PrivateCopy;
   2418       auto *Init = VD->getAnyInitializer();
   2419       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
   2420       if (Init) {
   2421         if (auto *Elem = Pair.second.PrivateElemInit) {
   2422           auto *OriginalVD = Pair.second.Original;
   2423           auto *SharedField = CapturesInfo.lookup(OriginalVD);
   2424           auto SharedRefLValue =
   2425               CGF.EmitLValueForField(SharedsBase, SharedField);
   2426           SharedRefLValue = CGF.MakeAddrLValue(
   2427               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
   2428               SharedRefLValue.getType(), AlignmentSource::Decl);
   2429           QualType Type = OriginalVD->getType();
   2430           if (Type->isArrayType()) {
   2431             // Initialize firstprivate array.
   2432             if (!isa<CXXConstructExpr>(Init) ||
   2433                 CGF.isTrivialInitializer(Init)) {
   2434               // Perform simple memcpy.
   2435               CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
   2436                                       SharedRefLValue.getAddress(), Type);
   2437             } else {
   2438               // Initialize firstprivate array using element-by-element
   2439               // intialization.
   2440               CGF.EmitOMPAggregateAssign(
   2441                   PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
   2442                   Type, [&CGF, Elem, Init, &CapturesInfo](
   2443                             Address DestElement, Address SrcElement) {
   2444                     // Clean up any temporaries needed by the initialization.
   2445                     CodeGenFunction::OMPPrivateScope InitScope(CGF);
   2446                     InitScope.addPrivate(Elem, [SrcElement]() -> Address {
   2447                       return SrcElement;
   2448                     });
   2449                     (void)InitScope.Privatize();
   2450                     // Emit initialization for single element.
   2451                     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
   2452                         CGF, &CapturesInfo);
   2453                     CGF.EmitAnyExprToMem(Init, DestElement,
   2454                                          Init->getType().getQualifiers(),
   2455                                          /*IsInitializer=*/false);
   2456                   });
   2457             }
   2458           } else {
   2459             CodeGenFunction::OMPPrivateScope InitScope(CGF);
   2460             InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
   2461               return SharedRefLValue.getAddress();
   2462             });
   2463             (void)InitScope.Privatize();
   2464             CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
   2465             CGF.EmitExprAsInit(Init, VD, PrivateLValue,
   2466                                /*capturedByInit=*/false);
   2467           }
   2468         } else {
   2469           CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
   2470         }
   2471       }
   2472       NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
   2473       ++FI;
   2474     }
   2475   }
   2476   // Provide pointer to function with destructors for privates.
   2477   llvm::Value *DestructorFn =
   2478       NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
   2479                                              KmpTaskTWithPrivatesPtrQTy,
   2480                                              KmpTaskTWithPrivatesQTy)
   2481                    : llvm::ConstantPointerNull::get(
   2482                          cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
   2483   LValue Destructor = CGF.EmitLValueForField(
   2484       TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
   2485   CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   2486                             DestructorFn, KmpRoutineEntryPtrTy),
   2487                         Destructor);
   2488 
   2489   // Process list of dependences.
   2490   Address DependenciesArray = Address::invalid();
   2491   unsigned NumDependencies = Dependences.size();
   2492   if (NumDependencies) {
   2493     // Dependence kind for RTL.
   2494     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
   2495     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
   2496     RecordDecl *KmpDependInfoRD;
   2497     QualType FlagsTy =
   2498         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
   2499     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
   2500     if (KmpDependInfoTy.isNull()) {
   2501       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
   2502       KmpDependInfoRD->startDefinition();
   2503       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
   2504       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
   2505       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
   2506       KmpDependInfoRD->completeDefinition();
   2507       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
   2508     } else {
   2509       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
   2510     }
   2511     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
   2512     // Define type kmp_depend_info[<Dependences.size()>];
   2513     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
   2514         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
   2515         ArrayType::Normal, /*IndexTypeQuals=*/0);
   2516     // kmp_depend_info[<Dependences.size()>] deps;
   2517     DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy);
   2518     for (unsigned i = 0; i < NumDependencies; ++i) {
   2519       const Expr *E = Dependences[i].second;
   2520       auto Addr = CGF.EmitLValue(E);
   2521       llvm::Value *Size;
   2522       QualType Ty = E->getType();
   2523       if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
   2524         LValue UpAddrLVal =
   2525             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
   2526         llvm::Value *UpAddr =
   2527             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
   2528         llvm::Value *LowIntPtr =
   2529             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
   2530         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
   2531         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
   2532       } else
   2533         Size = getTypeSize(CGF, Ty);
   2534       auto Base = CGF.MakeAddrLValue(
   2535           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
   2536           KmpDependInfoTy);
   2537       // deps[i].base_addr = &<Dependences[i].second>;
   2538       auto BaseAddrLVal = CGF.EmitLValueForField(
   2539           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
   2540       CGF.EmitStoreOfScalar(
   2541           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
   2542           BaseAddrLVal);
   2543       // deps[i].len = sizeof(<Dependences[i].second>);
   2544       auto LenLVal = CGF.EmitLValueForField(
   2545           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
   2546       CGF.EmitStoreOfScalar(Size, LenLVal);
   2547       // deps[i].flags = <Dependences[i].first>;
   2548       RTLDependenceKindTy DepKind;
   2549       switch (Dependences[i].first) {
   2550       case OMPC_DEPEND_in:
   2551         DepKind = DepIn;
   2552         break;
   2553       // Out and InOut dependencies must use the same code.
   2554       case OMPC_DEPEND_out:
   2555       case OMPC_DEPEND_inout:
   2556         DepKind = DepInOut;
   2557         break;
   2558       case OMPC_DEPEND_source:
   2559       case OMPC_DEPEND_unknown:
   2560         llvm_unreachable("Unknown task dependence type");
   2561       }
   2562       auto FlagsLVal = CGF.EmitLValueForField(
   2563           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
   2564       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
   2565                             FlagsLVal);
   2566     }
   2567     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   2568         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
   2569         CGF.VoidPtrTy);
   2570   }
   2571 
   2572   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
   2573   // libcall.
   2574   // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
   2575   // *new_task);
   2576   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
   2577   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
   2578   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
   2579   // list is not empty
   2580   auto *ThreadID = getThreadID(CGF, Loc);
   2581   auto *UpLoc = emitUpdateLocation(CGF, Loc);
   2582   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
   2583   llvm::Value *DepTaskArgs[7];
   2584   if (NumDependencies) {
   2585     DepTaskArgs[0] = UpLoc;
   2586     DepTaskArgs[1] = ThreadID;
   2587     DepTaskArgs[2] = NewTask;
   2588     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
   2589     DepTaskArgs[4] = DependenciesArray.getPointer();
   2590     DepTaskArgs[5] = CGF.Builder.getInt32(0);
   2591     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
   2592   }
   2593   auto &&ThenCodeGen = [this, NumDependencies,
   2594                         &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) {
   2595     // TODO: add check for untied tasks.
   2596     if (NumDependencies) {
   2597       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps),
   2598                           DepTaskArgs);
   2599     } else {
   2600       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
   2601                           TaskArgs);
   2602     }
   2603   };
   2604   typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
   2605       IfCallEndCleanup;
   2606 
   2607   llvm::Value *DepWaitTaskArgs[6];
   2608   if (NumDependencies) {
   2609     DepWaitTaskArgs[0] = UpLoc;
   2610     DepWaitTaskArgs[1] = ThreadID;
   2611     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
   2612     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
   2613     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
   2614     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
   2615   }
   2616   auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
   2617                         NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) {
   2618     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
   2619     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
   2620     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
   2621     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
   2622     // is specified.
   2623     if (NumDependencies)
   2624       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
   2625                           DepWaitTaskArgs);
   2626     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
   2627     // kmp_task_t *new_task);
   2628     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0),
   2629                         TaskArgs);
   2630     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
   2631     // kmp_task_t *new_task);
   2632     CGF.EHStack.pushCleanup<IfCallEndCleanup>(
   2633         NormalAndEHCleanup,
   2634         createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
   2635         llvm::makeArrayRef(TaskArgs));
   2636 
   2637     // Call proxy_task_entry(gtid, new_task);
   2638     llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
   2639     CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
   2640   };
   2641 
   2642   if (IfCond) {
   2643     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
   2644   } else {
   2645     CodeGenFunction::RunCleanupsScope Scope(CGF);
   2646     ThenCodeGen(CGF);
   2647   }
   2648 }
   2649 
   2650 /// \brief Emit reduction operation for each element of array (required for
   2651 /// array sections) LHS op = RHS.
   2652 /// \param Type Type of array.
   2653 /// \param LHSVar Variable on the left side of the reduction operation
   2654 /// (references element of array in original variable).
   2655 /// \param RHSVar Variable on the right side of the reduction operation
   2656 /// (references element of array in original variable).
   2657 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
   2658 /// RHSVar.
   2659 static void EmitOMPAggregateReduction(
   2660     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
   2661     const VarDecl *RHSVar,
   2662     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
   2663                                   const Expr *, const Expr *)> &RedOpGen,
   2664     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
   2665     const Expr *UpExpr = nullptr) {
   2666   // Perform element-by-element initialization.
   2667   QualType ElementTy;
   2668   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
   2669   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
   2670 
   2671   // Drill down to the base element type on both arrays.
   2672   auto ArrayTy = Type->getAsArrayTypeUnsafe();
   2673   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
   2674 
   2675   auto RHSBegin = RHSAddr.getPointer();
   2676   auto LHSBegin = LHSAddr.getPointer();
   2677   // Cast from pointer to array type to pointer to single element.
   2678   auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
   2679   // The basic structure here is a while-do loop.
   2680   auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
   2681   auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
   2682   auto IsEmpty =
   2683       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
   2684   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
   2685 
   2686   // Enter the loop body, making that address the current address.
   2687   auto EntryBB = CGF.Builder.GetInsertBlock();
   2688   CGF.EmitBlock(BodyBB);
   2689 
   2690   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
   2691 
   2692   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
   2693       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
   2694   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
   2695   Address RHSElementCurrent =
   2696       Address(RHSElementPHI,
   2697               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
   2698 
   2699   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
   2700       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
   2701   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
   2702   Address LHSElementCurrent =
   2703       Address(LHSElementPHI,
   2704               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
   2705 
   2706   // Emit copy.
   2707   CodeGenFunction::OMPPrivateScope Scope(CGF);
   2708   Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
   2709   Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
   2710   Scope.Privatize();
   2711   RedOpGen(CGF, XExpr, EExpr, UpExpr);
   2712   Scope.ForceCleanup();
   2713 
   2714   // Shift the address forward by one element.
   2715   auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
   2716       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
   2717   auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
   2718       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
   2719   // Check whether we've reached the end.
   2720   auto Done =
   2721       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
   2722   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
   2723   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
   2724   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
   2725 
   2726   // Done.
   2727   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
   2728 }
   2729 
   2730 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
   2731                                           llvm::Type *ArgsType,
   2732                                           ArrayRef<const Expr *> Privates,
   2733                                           ArrayRef<const Expr *> LHSExprs,
   2734                                           ArrayRef<const Expr *> RHSExprs,
   2735                                           ArrayRef<const Expr *> ReductionOps) {
   2736   auto &C = CGM.getContext();
   2737 
   2738   // void reduction_func(void *LHSArg, void *RHSArg);
   2739   FunctionArgList Args;
   2740   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
   2741                            C.VoidPtrTy);
   2742   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
   2743                            C.VoidPtrTy);
   2744   Args.push_back(&LHSArg);
   2745   Args.push_back(&RHSArg);
   2746   FunctionType::ExtInfo EI;
   2747   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
   2748       C.VoidTy, Args, EI, /*isVariadic=*/false);
   2749   auto *Fn = llvm::Function::Create(
   2750       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
   2751       ".omp.reduction.reduction_func", &CGM.getModule());
   2752   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
   2753   CodeGenFunction CGF(CGM);
   2754   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
   2755 
   2756   // Dst = (void*[n])(LHSArg);
   2757   // Src = (void*[n])(RHSArg);
   2758   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   2759       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
   2760       ArgsType), CGF.getPointerAlign());
   2761   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   2762       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
   2763       ArgsType), CGF.getPointerAlign());
   2764 
   2765   //  ...
   2766   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
   2767   //  ...
   2768   CodeGenFunction::OMPPrivateScope Scope(CGF);
   2769   auto IPriv = Privates.begin();
   2770   unsigned Idx = 0;
   2771   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
   2772     auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
   2773     Scope.addPrivate(RHSVar, [&]() -> Address {
   2774       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
   2775     });
   2776     auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
   2777     Scope.addPrivate(LHSVar, [&]() -> Address {
   2778       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
   2779     });
   2780     QualType PrivTy = (*IPriv)->getType();
   2781     if (PrivTy->isArrayType()) {
   2782       // Get array size and emit VLA type.
   2783       ++Idx;
   2784       Address Elem =
   2785           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
   2786       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
   2787       CodeGenFunction::OpaqueValueMapping OpaqueMap(
   2788           CGF,
   2789           cast<OpaqueValueExpr>(
   2790               CGF.getContext().getAsVariableArrayType(PrivTy)->getSizeExpr()),
   2791           RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
   2792       CGF.EmitVariablyModifiedType(PrivTy);
   2793     }
   2794   }
   2795   Scope.Privatize();
   2796   IPriv = Privates.begin();
   2797   auto ILHS = LHSExprs.begin();
   2798   auto IRHS = RHSExprs.begin();
   2799   for (auto *E : ReductionOps) {
   2800     if ((*IPriv)->getType()->isArrayType()) {
   2801       // Emit reduction for array section.
   2802       auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
   2803       auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
   2804       EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
   2805                                 [=](CodeGenFunction &CGF, const Expr *,
   2806                                     const Expr *,
   2807                                     const Expr *) { CGF.EmitIgnoredExpr(E); });
   2808     } else
   2809       // Emit reduction for array subscript or single variable.
   2810       CGF.EmitIgnoredExpr(E);
   2811     ++IPriv, ++ILHS, ++IRHS;
   2812   }
   2813   Scope.ForceCleanup();
   2814   CGF.FinishFunction();
   2815   return Fn;
   2816 }
   2817 
   2818 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
   2819                                     ArrayRef<const Expr *> Privates,
   2820                                     ArrayRef<const Expr *> LHSExprs,
   2821                                     ArrayRef<const Expr *> RHSExprs,
   2822                                     ArrayRef<const Expr *> ReductionOps,
   2823                                     bool WithNowait, bool SimpleReduction) {
   2824   if (!CGF.HaveInsertPoint())
   2825     return;
   2826   // Next code should be emitted for reduction:
   2827   //
   2828   // static kmp_critical_name lock = { 0 };
   2829   //
   2830   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
   2831   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
   2832   //  ...
   2833   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
   2834   //  *(Type<n>-1*)rhs[<n>-1]);
   2835   // }
   2836   //
   2837   // ...
   2838   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
   2839   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
   2840   // RedList, reduce_func, &<lock>)) {
   2841   // case 1:
   2842   //  ...
   2843   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
   2844   //  ...
   2845   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
   2846   // break;
   2847   // case 2:
   2848   //  ...
   2849   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
   2850   //  ...
   2851   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
   2852   // break;
   2853   // default:;
   2854   // }
   2855   //
   2856   // if SimpleReduction is true, only the next code is generated:
   2857   //  ...
   2858   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
   2859   //  ...
   2860 
   2861   auto &C = CGM.getContext();
   2862 
   2863   if (SimpleReduction) {
   2864     CodeGenFunction::RunCleanupsScope Scope(CGF);
   2865     auto IPriv = Privates.begin();
   2866     auto ILHS = LHSExprs.begin();
   2867     auto IRHS = RHSExprs.begin();
   2868     for (auto *E : ReductionOps) {
   2869       if ((*IPriv)->getType()->isArrayType()) {
   2870         auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
   2871         auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
   2872         EmitOMPAggregateReduction(
   2873             CGF, (*IPriv)->getType(), LHSVar, RHSVar,
   2874             [=](CodeGenFunction &CGF, const Expr *, const Expr *,
   2875                 const Expr *) { CGF.EmitIgnoredExpr(E); });
   2876       } else
   2877         CGF.EmitIgnoredExpr(E);
   2878       ++IPriv, ++ILHS, ++IRHS;
   2879     }
   2880     return;
   2881   }
   2882 
   2883   // 1. Build a list of reduction variables.
   2884   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
   2885   auto Size = RHSExprs.size();
   2886   for (auto *E : Privates) {
   2887     if (E->getType()->isArrayType())
   2888       // Reserve place for array size.
   2889       ++Size;
   2890   }
   2891   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
   2892   QualType ReductionArrayTy =
   2893       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
   2894                              /*IndexTypeQuals=*/0);
   2895   Address ReductionList =
   2896       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
   2897   auto IPriv = Privates.begin();
   2898   unsigned Idx = 0;
   2899   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
   2900     Address Elem =
   2901       CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
   2902     CGF.Builder.CreateStore(
   2903         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
   2904             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
   2905         Elem);
   2906     if ((*IPriv)->getType()->isArrayType()) {
   2907       // Store array size.
   2908       ++Idx;
   2909       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
   2910                                              CGF.getPointerSize());
   2911       CGF.Builder.CreateStore(
   2912           CGF.Builder.CreateIntToPtr(
   2913               CGF.Builder.CreateIntCast(
   2914                   CGF.getVLASize(CGF.getContext().getAsVariableArrayType(
   2915                                      (*IPriv)->getType()))
   2916                       .first,
   2917                   CGF.SizeTy, /*isSigned=*/false),
   2918               CGF.VoidPtrTy),
   2919           Elem);
   2920     }
   2921   }
   2922 
   2923   // 2. Emit reduce_func().
   2924   auto *ReductionFn = emitReductionFunction(
   2925       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
   2926       LHSExprs, RHSExprs, ReductionOps);
   2927 
   2928   // 3. Create static kmp_critical_name lock = { 0 };
   2929   auto *Lock = getCriticalRegionLock(".reduction");
   2930 
   2931   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
   2932   // RedList, reduce_func, &<lock>);
   2933   auto *IdentTLoc = emitUpdateLocation(
   2934       CGF, Loc,
   2935       static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
   2936   auto *ThreadId = getThreadID(CGF, Loc);
   2937   auto *ReductionArrayTySize = getTypeSize(CGF, ReductionArrayTy);
   2938   auto *RL =
   2939     CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
   2940                                                     CGF.VoidPtrTy);
   2941   llvm::Value *Args[] = {
   2942       IdentTLoc,                             // ident_t *<loc>
   2943       ThreadId,                              // i32 <gtid>
   2944       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
   2945       ReductionArrayTySize,                  // size_type sizeof(RedList)
   2946       RL,                                    // void *RedList
   2947       ReductionFn, // void (*) (void *, void *) <reduce_func>
   2948       Lock         // kmp_critical_name *&<lock>
   2949   };
   2950   auto Res = CGF.EmitRuntimeCall(
   2951       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
   2952                                        : OMPRTL__kmpc_reduce),
   2953       Args);
   2954 
   2955   // 5. Build switch(res)
   2956   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
   2957   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
   2958 
   2959   // 6. Build case 1:
   2960   //  ...
   2961   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
   2962   //  ...
   2963   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
   2964   // break;
   2965   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
   2966   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
   2967   CGF.EmitBlock(Case1BB);
   2968 
   2969   {
   2970     CodeGenFunction::RunCleanupsScope Scope(CGF);
   2971     // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
   2972     llvm::Value *EndArgs[] = {
   2973         IdentTLoc, // ident_t *<loc>
   2974         ThreadId,  // i32 <gtid>
   2975         Lock       // kmp_critical_name *&<lock>
   2976     };
   2977     CGF.EHStack
   2978         .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
   2979             NormalAndEHCleanup,
   2980             createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
   2981                                              : OMPRTL__kmpc_end_reduce),
   2982             llvm::makeArrayRef(EndArgs));
   2983     auto IPriv = Privates.begin();
   2984     auto ILHS = LHSExprs.begin();
   2985     auto IRHS = RHSExprs.begin();
   2986     for (auto *E : ReductionOps) {
   2987       if ((*IPriv)->getType()->isArrayType()) {
   2988         // Emit reduction for array section.
   2989         auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
   2990         auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
   2991         EmitOMPAggregateReduction(
   2992             CGF, (*IPriv)->getType(), LHSVar, RHSVar,
   2993             [=](CodeGenFunction &CGF, const Expr *, const Expr *,
   2994                 const Expr *) { CGF.EmitIgnoredExpr(E); });
   2995       } else
   2996         // Emit reduction for array subscript or single variable.
   2997         CGF.EmitIgnoredExpr(E);
   2998       ++IPriv, ++ILHS, ++IRHS;
   2999     }
   3000   }
   3001 
   3002   CGF.EmitBranch(DefaultBB);
   3003 
   3004   // 7. Build case 2:
   3005   //  ...
   3006   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
   3007   //  ...
   3008   // break;
   3009   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
   3010   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
   3011   CGF.EmitBlock(Case2BB);
   3012 
   3013   {
   3014     CodeGenFunction::RunCleanupsScope Scope(CGF);
   3015     if (!WithNowait) {
   3016       // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
   3017       llvm::Value *EndArgs[] = {
   3018           IdentTLoc, // ident_t *<loc>
   3019           ThreadId,  // i32 <gtid>
   3020           Lock       // kmp_critical_name *&<lock>
   3021       };
   3022       CGF.EHStack
   3023           .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
   3024               NormalAndEHCleanup,
   3025               createRuntimeFunction(OMPRTL__kmpc_end_reduce),
   3026               llvm::makeArrayRef(EndArgs));
   3027     }
   3028     auto ILHS = LHSExprs.begin();
   3029     auto IRHS = RHSExprs.begin();
   3030     auto IPriv = Privates.begin();
   3031     for (auto *E : ReductionOps) {
   3032         const Expr *XExpr = nullptr;
   3033         const Expr *EExpr = nullptr;
   3034         const Expr *UpExpr = nullptr;
   3035         BinaryOperatorKind BO = BO_Comma;
   3036         if (auto *BO = dyn_cast<BinaryOperator>(E)) {
   3037           if (BO->getOpcode() == BO_Assign) {
   3038             XExpr = BO->getLHS();
   3039             UpExpr = BO->getRHS();
   3040           }
   3041         }
   3042         // Try to emit update expression as a simple atomic.
   3043         auto *RHSExpr = UpExpr;
   3044         if (RHSExpr) {
   3045           // Analyze RHS part of the whole expression.
   3046           if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
   3047                   RHSExpr->IgnoreParenImpCasts())) {
   3048             // If this is a conditional operator, analyze its condition for
   3049             // min/max reduction operator.
   3050             RHSExpr = ACO->getCond();
   3051           }
   3052           if (auto *BORHS =
   3053                   dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
   3054             EExpr = BORHS->getRHS();
   3055             BO = BORHS->getOpcode();
   3056           }
   3057         }
   3058         if (XExpr) {
   3059           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
   3060           auto &&AtomicRedGen = [this, BO, VD, IPriv,
   3061                                  Loc](CodeGenFunction &CGF, const Expr *XExpr,
   3062                                       const Expr *EExpr, const Expr *UpExpr) {
   3063             LValue X = CGF.EmitLValue(XExpr);
   3064             RValue E;
   3065             if (EExpr)
   3066               E = CGF.EmitAnyExpr(EExpr);
   3067             CGF.EmitOMPAtomicSimpleUpdateExpr(
   3068                 X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
   3069                 [&CGF, UpExpr, VD, IPriv](RValue XRValue) {
   3070                   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
   3071                   PrivateScope.addPrivate(VD, [&CGF, VD, XRValue]() -> Address {
   3072                     Address LHSTemp = CGF.CreateMemTemp(VD->getType());
   3073                     CGF.EmitStoreThroughLValue(
   3074                         XRValue, CGF.MakeAddrLValue(LHSTemp, VD->getType()));
   3075                     return LHSTemp;
   3076                   });
   3077                   (void)PrivateScope.Privatize();
   3078                   return CGF.EmitAnyExpr(UpExpr);
   3079                 });
   3080           };
   3081           if ((*IPriv)->getType()->isArrayType()) {
   3082             // Emit atomic reduction for array section.
   3083             auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
   3084             EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
   3085                                       AtomicRedGen, XExpr, EExpr, UpExpr);
   3086           } else
   3087             // Emit atomic reduction for array subscript or single variable.
   3088             AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
   3089         } else {
   3090           // Emit as a critical region.
   3091           auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *,
   3092                                              const Expr *, const Expr *) {
   3093             emitCriticalRegion(
   3094                 CGF, ".atomic_reduction",
   3095                 [E](CodeGenFunction &CGF) { CGF.EmitIgnoredExpr(E); }, Loc);
   3096           };
   3097           if ((*IPriv)->getType()->isArrayType()) {
   3098             auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
   3099             auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
   3100             EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
   3101                                       CritRedGen);
   3102           } else
   3103             CritRedGen(CGF, nullptr, nullptr, nullptr);
   3104         }
   3105       ++ILHS, ++IRHS, ++IPriv;
   3106     }
   3107   }
   3108 
   3109   CGF.EmitBranch(DefaultBB);
   3110   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
   3111 }
   3112 
   3113 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
   3114                                        SourceLocation Loc) {
   3115   if (!CGF.HaveInsertPoint())
   3116     return;
   3117   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
   3118   // global_tid);
   3119   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
   3120   // Ignore return result until untied tasks are supported.
   3121   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
   3122 }
   3123 
   3124 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
   3125                                            OpenMPDirectiveKind InnerKind,
   3126                                            const RegionCodeGenTy &CodeGen,
   3127                                            bool HasCancel) {
   3128   if (!CGF.HaveInsertPoint())
   3129     return;
   3130   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
   3131   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
   3132 }
   3133 
   3134 namespace {
   3135 enum RTCancelKind {
   3136   CancelNoreq = 0,
   3137   CancelParallel = 1,
   3138   CancelLoop = 2,
   3139   CancelSections = 3,
   3140   CancelTaskgroup = 4
   3141 };
   3142 }
   3143 
   3144 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
   3145   RTCancelKind CancelKind = CancelNoreq;
   3146   if (CancelRegion == OMPD_parallel)
   3147     CancelKind = CancelParallel;
   3148   else if (CancelRegion == OMPD_for)
   3149     CancelKind = CancelLoop;
   3150   else if (CancelRegion == OMPD_sections)
   3151     CancelKind = CancelSections;
   3152   else {
   3153     assert(CancelRegion == OMPD_taskgroup);
   3154     CancelKind = CancelTaskgroup;
   3155   }
   3156   return CancelKind;
   3157 }
   3158 
   3159 void CGOpenMPRuntime::emitCancellationPointCall(
   3160     CodeGenFunction &CGF, SourceLocation Loc,
   3161     OpenMPDirectiveKind CancelRegion) {
   3162   if (!CGF.HaveInsertPoint())
   3163     return;
   3164   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
   3165   // global_tid, kmp_int32 cncl_kind);
   3166   if (auto *OMPRegionInfo =
   3167           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
   3168     if (OMPRegionInfo->getDirectiveKind() == OMPD_single)
   3169       return;
   3170     if (OMPRegionInfo->hasCancel()) {
   3171       llvm::Value *Args[] = {
   3172           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
   3173           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
   3174       // Ignore return result until untied tasks are supported.
   3175       auto *Result = CGF.EmitRuntimeCall(
   3176           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
   3177       // if (__kmpc_cancellationpoint()) {
   3178       //  __kmpc_cancel_barrier();
   3179       //   exit from construct;
   3180       // }
   3181       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
   3182       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
   3183       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
   3184       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
   3185       CGF.EmitBlock(ExitBB);
   3186       // __kmpc_cancel_barrier();
   3187       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
   3188       // exit from construct;
   3189       auto CancelDest =
   3190           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
   3191       CGF.EmitBranchThroughCleanup(CancelDest);
   3192       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
   3193     }
   3194   }
   3195 }
   3196 
   3197 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
   3198                                      const Expr *IfCond,
   3199                                      OpenMPDirectiveKind CancelRegion) {
   3200   if (!CGF.HaveInsertPoint())
   3201     return;
   3202   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
   3203   // kmp_int32 cncl_kind);
   3204   if (auto *OMPRegionInfo =
   3205           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
   3206     if (OMPRegionInfo->getDirectiveKind() == OMPD_single)
   3207       return;
   3208     auto &&ThenGen = [this, Loc, CancelRegion,
   3209                       OMPRegionInfo](CodeGenFunction &CGF) {
   3210       llvm::Value *Args[] = {
   3211           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
   3212           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
   3213       // Ignore return result until untied tasks are supported.
   3214       auto *Result =
   3215           CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
   3216       // if (__kmpc_cancel()) {
   3217       //  __kmpc_cancel_barrier();
   3218       //   exit from construct;
   3219       // }
   3220       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
   3221       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
   3222       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
   3223       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
   3224       CGF.EmitBlock(ExitBB);
   3225       // __kmpc_cancel_barrier();
   3226       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
   3227       // exit from construct;
   3228       auto CancelDest =
   3229           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
   3230       CGF.EmitBranchThroughCleanup(CancelDest);
   3231       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
   3232     };
   3233     if (IfCond)
   3234       emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {});
   3235     else
   3236       ThenGen(CGF);
   3237   }
   3238 }
   3239 
   3240 llvm::Value *
   3241 CGOpenMPRuntime::emitTargetOutlinedFunction(const OMPExecutableDirective &D,
   3242                                             const RegionCodeGenTy &CodeGen) {
   3243   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
   3244 
   3245   CodeGenFunction CGF(CGM, true);
   3246   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen);
   3247   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
   3248   return CGF.GenerateOpenMPCapturedStmtFunction(CS);
   3249 }
   3250 
   3251 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
   3252                                      const OMPExecutableDirective &D,
   3253                                      llvm::Value *OutlinedFn,
   3254                                      const Expr *IfCond, const Expr *Device,
   3255                                      ArrayRef<llvm::Value *> CapturedVars) {
   3256   if (!CGF.HaveInsertPoint())
   3257     return;
   3258   /// \brief Values for bit flags used to specify the mapping type for
   3259   /// offloading.
   3260   enum OpenMPOffloadMappingFlags {
   3261     /// \brief Allocate memory on the device and move data from host to device.
   3262     OMP_MAP_TO = 0x01,
   3263     /// \brief Allocate memory on the device and move data from device to host.
   3264     OMP_MAP_FROM = 0x02,
   3265     /// \brief The element passed to the device is a pointer.
   3266     OMP_MAP_PTR = 0x20,
   3267     /// \brief Pass the element to the device by value.
   3268     OMP_MAP_BYCOPY = 0x80,
   3269   };
   3270 
   3271   enum OpenMPOffloadingReservedDeviceIDs {
   3272     /// \brief Device ID if the device was not defined, runtime should get it
   3273     /// from environment variables in the spec.
   3274     OMP_DEVICEID_UNDEF = -1,
   3275   };
   3276 
   3277   auto &Ctx = CGF.getContext();
   3278 
   3279   // Fill up the arrays with the all the captured variables.
   3280   SmallVector<llvm::Value *, 16> BasePointers;
   3281   SmallVector<llvm::Value *, 16> Pointers;
   3282   SmallVector<llvm::Value *, 16> Sizes;
   3283   SmallVector<unsigned, 16> MapTypes;
   3284 
   3285   bool hasVLACaptures = false;
   3286 
   3287   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
   3288   auto RI = CS.getCapturedRecordDecl()->field_begin();
   3289   // auto II = CS.capture_init_begin();
   3290   auto CV = CapturedVars.begin();
   3291   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
   3292                                             CE = CS.capture_end();
   3293        CI != CE; ++CI, ++RI, ++CV) {
   3294     StringRef Name;
   3295     QualType Ty;
   3296     llvm::Value *BasePointer;
   3297     llvm::Value *Pointer;
   3298     llvm::Value *Size;
   3299     unsigned MapType;
   3300 
   3301     // VLA sizes are passed to the outlined region by copy.
   3302     if (CI->capturesVariableArrayType()) {
   3303       BasePointer = Pointer = *CV;
   3304       Size = getTypeSize(CGF, RI->getType());
   3305       // Copy to the device as an argument. No need to retrieve it.
   3306       MapType = OMP_MAP_BYCOPY;
   3307       hasVLACaptures = true;
   3308     } else if (CI->capturesThis()) {
   3309       BasePointer = Pointer = *CV;
   3310       const PointerType *PtrTy = cast<PointerType>(RI->getType().getTypePtr());
   3311       Size = getTypeSize(CGF, PtrTy->getPointeeType());
   3312       // Default map type.
   3313       MapType = OMP_MAP_TO | OMP_MAP_FROM;
   3314     } else if (CI->capturesVariableByCopy()) {
   3315       MapType = OMP_MAP_BYCOPY;
   3316       if (!RI->getType()->isAnyPointerType()) {
   3317         // If the field is not a pointer, we need to save the actual value and
   3318         // load it as a void pointer.
   3319         auto DstAddr = CGF.CreateMemTemp(
   3320             Ctx.getUIntPtrType(),
   3321             Twine(CI->getCapturedVar()->getName()) + ".casted");
   3322         LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
   3323 
   3324         auto *SrcAddrVal = CGF.EmitScalarConversion(
   3325             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
   3326             Ctx.getPointerType(RI->getType()), SourceLocation());
   3327         LValue SrcLV =
   3328             CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType());
   3329 
   3330         // Store the value using the source type pointer.
   3331         CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV);
   3332 
   3333         // Load the value using the destination type pointer.
   3334         BasePointer = Pointer =
   3335             CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
   3336       } else {
   3337         MapType |= OMP_MAP_PTR;
   3338         BasePointer = Pointer = *CV;
   3339       }
   3340       Size = getTypeSize(CGF, RI->getType());
   3341     } else {
   3342       assert(CI->capturesVariable() && "Expected captured reference.");
   3343       BasePointer = Pointer = *CV;
   3344 
   3345       const ReferenceType *PtrTy =
   3346           cast<ReferenceType>(RI->getType().getTypePtr());
   3347       QualType ElementType = PtrTy->getPointeeType();
   3348       Size = getTypeSize(CGF, ElementType);
   3349       // The default map type for a scalar/complex type is 'to' because by
   3350       // default the value doesn't have to be retrieved. For an aggregate type,
   3351       // the default is 'tofrom'.
   3352       MapType = ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM)
   3353                                                : OMP_MAP_TO;
   3354       if (ElementType->isAnyPointerType())
   3355         MapType |= OMP_MAP_PTR;
   3356     }
   3357 
   3358     BasePointers.push_back(BasePointer);
   3359     Pointers.push_back(Pointer);
   3360     Sizes.push_back(Size);
   3361     MapTypes.push_back(MapType);
   3362   }
   3363 
   3364   // Keep track on whether the host function has to be executed.
   3365   auto OffloadErrorQType =
   3366       Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
   3367   auto OffloadError = CGF.MakeAddrLValue(
   3368       CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
   3369       OffloadErrorQType);
   3370   CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
   3371                         OffloadError);
   3372 
   3373   // Fill up the pointer arrays and transfer execution to the device.
   3374   auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes,
   3375                     hasVLACaptures, Device, OffloadError,
   3376                     OffloadErrorQType](CodeGenFunction &CGF) {
   3377     unsigned PointerNumVal = BasePointers.size();
   3378     llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
   3379     llvm::Value *BasePointersArray;
   3380     llvm::Value *PointersArray;
   3381     llvm::Value *SizesArray;
   3382     llvm::Value *MapTypesArray;
   3383 
   3384     if (PointerNumVal) {
   3385       llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
   3386       QualType PointerArrayType = Ctx.getConstantArrayType(
   3387           Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
   3388           /*IndexTypeQuals=*/0);
   3389 
   3390       BasePointersArray =
   3391           CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
   3392       PointersArray =
   3393           CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
   3394 
   3395       // If we don't have any VLA types, we can use a constant array for the map
   3396       // sizes, otherwise we need to fill up the arrays as we do for the
   3397       // pointers.
   3398       if (hasVLACaptures) {
   3399         QualType SizeArrayType = Ctx.getConstantArrayType(
   3400             Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
   3401             /*IndexTypeQuals=*/0);
   3402         SizesArray =
   3403             CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
   3404       } else {
   3405         // We expect all the sizes to be constant, so we collect them to create
   3406         // a constant array.
   3407         SmallVector<llvm::Constant *, 16> ConstSizes;
   3408         for (auto S : Sizes)
   3409           ConstSizes.push_back(cast<llvm::Constant>(S));
   3410 
   3411         auto *SizesArrayInit = llvm::ConstantArray::get(
   3412             llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
   3413         auto *SizesArrayGbl = new llvm::GlobalVariable(
   3414             CGM.getModule(), SizesArrayInit->getType(),
   3415             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
   3416             SizesArrayInit, ".offload_sizes");
   3417         SizesArrayGbl->setUnnamedAddr(true);
   3418         SizesArray = SizesArrayGbl;
   3419       }
   3420 
   3421       // The map types are always constant so we don't need to generate code to
   3422       // fill arrays. Instead, we create an array constant.
   3423       llvm::Constant *MapTypesArrayInit =
   3424           llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
   3425       auto *MapTypesArrayGbl = new llvm::GlobalVariable(
   3426           CGM.getModule(), MapTypesArrayInit->getType(),
   3427           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
   3428           MapTypesArrayInit, ".offload_maptypes");
   3429       MapTypesArrayGbl->setUnnamedAddr(true);
   3430       MapTypesArray = MapTypesArrayGbl;
   3431 
   3432       for (unsigned i = 0; i < PointerNumVal; ++i) {
   3433 
   3434         llvm::Value *BPVal = BasePointers[i];
   3435         if (BPVal->getType()->isPointerTy())
   3436           BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
   3437         else {
   3438           assert(BPVal->getType()->isIntegerTy() &&
   3439                  "If not a pointer, the value type must be an integer.");
   3440           BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
   3441         }
   3442         llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
   3443             llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal),
   3444             BasePointersArray, 0, i);
   3445         Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
   3446         CGF.Builder.CreateStore(BPVal, BPAddr);
   3447 
   3448         llvm::Value *PVal = Pointers[i];
   3449         if (PVal->getType()->isPointerTy())
   3450           PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
   3451         else {
   3452           assert(PVal->getType()->isIntegerTy() &&
   3453                  "If not a pointer, the value type must be an integer.");
   3454           PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
   3455         }
   3456         llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
   3457             llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
   3458             0, i);
   3459         Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
   3460         CGF.Builder.CreateStore(PVal, PAddr);
   3461 
   3462         if (hasVLACaptures) {
   3463           llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
   3464               llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
   3465               /*Idx0=*/0,
   3466               /*Idx1=*/i);
   3467           Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
   3468           CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(
   3469                                       Sizes[i], CGM.SizeTy, /*isSigned=*/true),
   3470                                   SAddr);
   3471         }
   3472       }
   3473 
   3474       BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
   3475           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
   3476           /*Idx0=*/0, /*Idx1=*/0);
   3477       PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
   3478           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
   3479           /*Idx0=*/0,
   3480           /*Idx1=*/0);
   3481       SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
   3482           llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
   3483           /*Idx0=*/0, /*Idx1=*/0);
   3484       MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
   3485           llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray,
   3486           /*Idx0=*/0,
   3487           /*Idx1=*/0);
   3488 
   3489     } else {
   3490       BasePointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
   3491       PointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
   3492       SizesArray = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
   3493       MapTypesArray =
   3494           llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
   3495     }
   3496 
   3497     // On top of the arrays that were filled up, the target offloading call
   3498     // takes as arguments the device id as well as the host pointer. The host
   3499     // pointer is used by the runtime library to identify the current target
   3500     // region, so it only has to be unique and not necessarily point to
   3501     // anything. It could be the pointer to the outlined function that
   3502     // implements the target region, but we aren't using that so that the
   3503     // compiler doesn't need to keep that, and could therefore inline the host
   3504     // function if proven worthwhile during optimization.
   3505 
   3506     llvm::Value *HostPtr = new llvm::GlobalVariable(
   3507         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
   3508         llvm::GlobalValue::PrivateLinkage,
   3509         llvm::Constant::getNullValue(CGM.Int8Ty), ".offload_hstptr");
   3510 
   3511     // Emit device ID if any.
   3512     llvm::Value *DeviceID;
   3513     if (Device)
   3514       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
   3515                                            CGM.Int32Ty, /*isSigned=*/true);
   3516     else
   3517       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
   3518 
   3519     llvm::Value *OffloadingArgs[] = {
   3520         DeviceID,      HostPtr,    PointerNum,   BasePointersArray,
   3521         PointersArray, SizesArray, MapTypesArray};
   3522     auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
   3523                                       OffloadingArgs);
   3524 
   3525     CGF.EmitStoreOfScalar(Return, OffloadError);
   3526   };
   3527 
   3528   if (IfCond) {
   3529     // Notify that the host version must be executed.
   3530     auto &&ElseGen = [this, OffloadError,
   3531                       OffloadErrorQType](CodeGenFunction &CGF) {
   3532       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u),
   3533                             OffloadError);
   3534     };
   3535     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
   3536   } else {
   3537     CodeGenFunction::RunCleanupsScope Scope(CGF);
   3538     ThenGen(CGF);
   3539   }
   3540 
   3541   // Check the error code and execute the host version if required.
   3542   auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
   3543   auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
   3544   auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
   3545   auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
   3546   CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
   3547 
   3548   CGF.EmitBlock(OffloadFailedBlock);
   3549   CGF.Builder.CreateCall(OutlinedFn, BasePointers);
   3550   CGF.EmitBranch(OffloadContBlock);
   3551 
   3552   CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
   3553   return;
   3554 }
   3555