Home | History | Annotate | Download | only in CodeGen
      1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This contains code to emit OpenMP nodes as LLVM code.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "CGOpenMPRuntime.h"
     15 #include "CodeGenFunction.h"
     16 #include "CodeGenModule.h"
     17 #include "TargetInfo.h"
     18 #include "clang/AST/Stmt.h"
     19 #include "clang/AST/StmtOpenMP.h"
     20 using namespace clang;
     21 using namespace CodeGen;
     22 
     23 void CodeGenFunction::GenerateOpenMPCapturedVars(
     24     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
     25   const RecordDecl *RD = S.getCapturedRecordDecl();
     26   auto CurField = RD->field_begin();
     27   auto CurCap = S.captures().begin();
     28   for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
     29                                                  E = S.capture_init_end();
     30        I != E; ++I, ++CurField, ++CurCap) {
     31     if (CurField->hasCapturedVLAType()) {
     32       auto VAT = CurField->getCapturedVLAType();
     33       auto *Val = VLASizeMap[VAT->getSizeExpr()];
     34       CapturedVars.push_back(Val);
     35     } else if (CurCap->capturesThis())
     36       CapturedVars.push_back(CXXThisValue);
     37     else if (CurCap->capturesVariableByCopy())
     38       CapturedVars.push_back(
     39           EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal());
     40     else {
     41       assert(CurCap->capturesVariable() && "Expected capture by reference.");
     42       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
     43     }
     44   }
     45 }
     46 
     47 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
     48                                     StringRef Name, LValue AddrLV,
     49                                     bool isReferenceType = false) {
     50   ASTContext &Ctx = CGF.getContext();
     51 
     52   auto *CastedPtr = CGF.EmitScalarConversion(
     53       AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
     54       Ctx.getPointerType(DstType), SourceLocation());
     55   auto TmpAddr =
     56       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
     57           .getAddress();
     58 
     59   // If we are dealing with references we need to return the address of the
     60   // reference instead of the reference of the value.
     61   if (isReferenceType) {
     62     QualType RefType = Ctx.getLValueReferenceType(DstType);
     63     auto *RefVal = TmpAddr.getPointer();
     64     TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
     65     auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
     66     CGF.EmitScalarInit(RefVal, TmpLVal);
     67   }
     68 
     69   return TmpAddr;
     70 }
     71 
     72 llvm::Function *
     73 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
     74   assert(
     75       CapturedStmtInfo &&
     76       "CapturedStmtInfo should be set when generating the captured function");
     77   const CapturedDecl *CD = S.getCapturedDecl();
     78   const RecordDecl *RD = S.getCapturedRecordDecl();
     79   assert(CD->hasBody() && "missing CapturedDecl body");
     80 
     81   // Build the argument list.
     82   ASTContext &Ctx = CGM.getContext();
     83   FunctionArgList Args;
     84   Args.append(CD->param_begin(),
     85               std::next(CD->param_begin(), CD->getContextParamPosition()));
     86   auto I = S.captures().begin();
     87   for (auto *FD : RD->fields()) {
     88     QualType ArgType = FD->getType();
     89     IdentifierInfo *II = nullptr;
     90     VarDecl *CapVar = nullptr;
     91 
     92     // If this is a capture by copy and the type is not a pointer, the outlined
     93     // function argument type should be uintptr and the value properly casted to
     94     // uintptr. This is necessary given that the runtime library is only able to
     95     // deal with pointers. We can pass in the same way the VLA type sizes to the
     96     // outlined function.
     97     if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
     98         I->capturesVariableArrayType())
     99       ArgType = Ctx.getUIntPtrType();
    100 
    101     if (I->capturesVariable() || I->capturesVariableByCopy()) {
    102       CapVar = I->getCapturedVar();
    103       II = CapVar->getIdentifier();
    104     } else if (I->capturesThis())
    105       II = &getContext().Idents.get("this");
    106     else {
    107       assert(I->capturesVariableArrayType());
    108       II = &getContext().Idents.get("vla");
    109     }
    110     if (ArgType->isVariablyModifiedType())
    111       ArgType = getContext().getVariableArrayDecayedType(ArgType);
    112     Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr,
    113                                              FD->getLocation(), II, ArgType));
    114     ++I;
    115   }
    116   Args.append(
    117       std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
    118       CD->param_end());
    119 
    120   // Create the function declaration.
    121   FunctionType::ExtInfo ExtInfo;
    122   const CGFunctionInfo &FuncInfo =
    123       CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo,
    124                                                     /*IsVariadic=*/false);
    125   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
    126 
    127   llvm::Function *F = llvm::Function::Create(
    128       FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
    129       CapturedStmtInfo->getHelperName(), &CGM.getModule());
    130   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
    131   if (CD->isNothrow())
    132     F->addFnAttr(llvm::Attribute::NoUnwind);
    133 
    134   // Generate the function.
    135   StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
    136                 CD->getBody()->getLocStart());
    137   unsigned Cnt = CD->getContextParamPosition();
    138   I = S.captures().begin();
    139   for (auto *FD : RD->fields()) {
    140     // If we are capturing a pointer by copy we don't need to do anything, just
    141     // use the value that we get from the arguments.
    142     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
    143       setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt]));
    144       ++Cnt, ++I;
    145       continue;
    146     }
    147 
    148     LValue ArgLVal =
    149         MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(),
    150                        AlignmentSource::Decl);
    151     if (FD->hasCapturedVLAType()) {
    152       LValue CastedArgLVal =
    153           MakeAddrLValue(castValueFromUintptr(*this, FD->getType(),
    154                                               Args[Cnt]->getName(), ArgLVal),
    155                          FD->getType(), AlignmentSource::Decl);
    156       auto *ExprArg =
    157           EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal();
    158       auto VAT = FD->getCapturedVLAType();
    159       VLASizeMap[VAT->getSizeExpr()] = ExprArg;
    160     } else if (I->capturesVariable()) {
    161       auto *Var = I->getCapturedVar();
    162       QualType VarTy = Var->getType();
    163       Address ArgAddr = ArgLVal.getAddress();
    164       if (!VarTy->isReferenceType()) {
    165         ArgAddr = EmitLoadOfReference(
    166             ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
    167       }
    168       setAddrOfLocalVar(
    169           Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var)));
    170     } else if (I->capturesVariableByCopy()) {
    171       assert(!FD->getType()->isAnyPointerType() &&
    172              "Not expecting a captured pointer.");
    173       auto *Var = I->getCapturedVar();
    174       QualType VarTy = Var->getType();
    175       setAddrOfLocalVar(I->getCapturedVar(),
    176                         castValueFromUintptr(*this, FD->getType(),
    177                                              Args[Cnt]->getName(), ArgLVal,
    178                                              VarTy->isReferenceType()));
    179     } else {
    180       // If 'this' is captured, load it into CXXThisValue.
    181       assert(I->capturesThis());
    182       CXXThisValue =
    183           EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal();
    184     }
    185     ++Cnt, ++I;
    186   }
    187 
    188   PGO.assignRegionCounters(GlobalDecl(CD), F);
    189   CapturedStmtInfo->EmitBody(*this, CD->getBody());
    190   FinishFunction(CD->getBodyRBrace());
    191 
    192   return F;
    193 }
    194 
    195 //===----------------------------------------------------------------------===//
    196 //                              OpenMP Directive Emission
    197 //===----------------------------------------------------------------------===//
    198 void CodeGenFunction::EmitOMPAggregateAssign(
    199     Address DestAddr, Address SrcAddr, QualType OriginalType,
    200     const llvm::function_ref<void(Address, Address)> &CopyGen) {
    201   // Perform element-by-element initialization.
    202   QualType ElementTy;
    203 
    204   // Drill down to the base element type on both arrays.
    205   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
    206   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
    207   SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
    208 
    209   auto SrcBegin = SrcAddr.getPointer();
    210   auto DestBegin = DestAddr.getPointer();
    211   // Cast from pointer to array type to pointer to single element.
    212   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
    213   // The basic structure here is a while-do loop.
    214   auto BodyBB = createBasicBlock("omp.arraycpy.body");
    215   auto DoneBB = createBasicBlock("omp.arraycpy.done");
    216   auto IsEmpty =
    217       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
    218   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
    219 
    220   // Enter the loop body, making that address the current address.
    221   auto EntryBB = Builder.GetInsertBlock();
    222   EmitBlock(BodyBB);
    223 
    224   CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
    225 
    226   llvm::PHINode *SrcElementPHI =
    227     Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
    228   SrcElementPHI->addIncoming(SrcBegin, EntryBB);
    229   Address SrcElementCurrent =
    230       Address(SrcElementPHI,
    231               SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
    232 
    233   llvm::PHINode *DestElementPHI =
    234     Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
    235   DestElementPHI->addIncoming(DestBegin, EntryBB);
    236   Address DestElementCurrent =
    237     Address(DestElementPHI,
    238             DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
    239 
    240   // Emit copy.
    241   CopyGen(DestElementCurrent, SrcElementCurrent);
    242 
    243   // Shift the address forward by one element.
    244   auto DestElementNext = Builder.CreateConstGEP1_32(
    245       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
    246   auto SrcElementNext = Builder.CreateConstGEP1_32(
    247       SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
    248   // Check whether we've reached the end.
    249   auto Done =
    250       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
    251   Builder.CreateCondBr(Done, DoneBB, BodyBB);
    252   DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
    253   SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
    254 
    255   // Done.
    256   EmitBlock(DoneBB, /*IsFinished=*/true);
    257 }
    258 
    259 /// \brief Emit initialization of arrays of complex types.
    260 /// \param DestAddr Address of the array.
    261 /// \param Type Type of array.
    262 /// \param Init Initial expression of array.
    263 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
    264                                  QualType Type, const Expr *Init) {
    265   // Perform element-by-element initialization.
    266   QualType ElementTy;
    267 
    268   // Drill down to the base element type on both arrays.
    269   auto ArrayTy = Type->getAsArrayTypeUnsafe();
    270   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
    271   DestAddr =
    272       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
    273 
    274   auto DestBegin = DestAddr.getPointer();
    275   // Cast from pointer to array type to pointer to single element.
    276   auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
    277   // The basic structure here is a while-do loop.
    278   auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
    279   auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
    280   auto IsEmpty =
    281       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
    282   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
    283 
    284   // Enter the loop body, making that address the current address.
    285   auto EntryBB = CGF.Builder.GetInsertBlock();
    286   CGF.EmitBlock(BodyBB);
    287 
    288   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
    289 
    290   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
    291       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
    292   DestElementPHI->addIncoming(DestBegin, EntryBB);
    293   Address DestElementCurrent =
    294       Address(DestElementPHI,
    295               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
    296 
    297   // Emit copy.
    298   {
    299     CodeGenFunction::RunCleanupsScope InitScope(CGF);
    300     CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
    301                          /*IsInitializer=*/false);
    302   }
    303 
    304   // Shift the address forward by one element.
    305   auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
    306       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
    307   // Check whether we've reached the end.
    308   auto Done =
    309       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
    310   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
    311   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
    312 
    313   // Done.
    314   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
    315 }
    316 
    317 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
    318                                   Address SrcAddr, const VarDecl *DestVD,
    319                                   const VarDecl *SrcVD, const Expr *Copy) {
    320   if (OriginalType->isArrayType()) {
    321     auto *BO = dyn_cast<BinaryOperator>(Copy);
    322     if (BO && BO->getOpcode() == BO_Assign) {
    323       // Perform simple memcpy for simple copying.
    324       EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
    325     } else {
    326       // For arrays with complex element types perform element by element
    327       // copying.
    328       EmitOMPAggregateAssign(
    329           DestAddr, SrcAddr, OriginalType,
    330           [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
    331             // Working with the single array element, so have to remap
    332             // destination and source variables to corresponding array
    333             // elements.
    334             CodeGenFunction::OMPPrivateScope Remap(*this);
    335             Remap.addPrivate(DestVD, [DestElement]() -> Address {
    336               return DestElement;
    337             });
    338             Remap.addPrivate(
    339                 SrcVD, [SrcElement]() -> Address { return SrcElement; });
    340             (void)Remap.Privatize();
    341             EmitIgnoredExpr(Copy);
    342           });
    343     }
    344   } else {
    345     // Remap pseudo source variable to private copy.
    346     CodeGenFunction::OMPPrivateScope Remap(*this);
    347     Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
    348     Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
    349     (void)Remap.Privatize();
    350     // Emit copying of the whole variable.
    351     EmitIgnoredExpr(Copy);
    352   }
    353 }
    354 
    355 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
    356                                                 OMPPrivateScope &PrivateScope) {
    357   if (!HaveInsertPoint())
    358     return false;
    359   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
    360   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
    361     auto IRef = C->varlist_begin();
    362     auto InitsRef = C->inits().begin();
    363     for (auto IInit : C->private_copies()) {
    364       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
    365       if (EmittedAsFirstprivate.count(OrigVD) == 0) {
    366         EmittedAsFirstprivate.insert(OrigVD);
    367         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
    368         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
    369         bool IsRegistered;
    370         DeclRefExpr DRE(
    371             const_cast<VarDecl *>(OrigVD),
    372             /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
    373                 OrigVD) != nullptr,
    374             (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
    375         Address OriginalAddr = EmitLValue(&DRE).getAddress();
    376         QualType Type = OrigVD->getType();
    377         if (Type->isArrayType()) {
    378           // Emit VarDecl with copy init for arrays.
    379           // Get the address of the original variable captured in current
    380           // captured region.
    381           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
    382             auto Emission = EmitAutoVarAlloca(*VD);
    383             auto *Init = VD->getInit();
    384             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
    385               // Perform simple memcpy.
    386               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
    387                                   Type);
    388             } else {
    389               EmitOMPAggregateAssign(
    390                   Emission.getAllocatedAddress(), OriginalAddr, Type,
    391                   [this, VDInit, Init](Address DestElement,
    392                                        Address SrcElement) {
    393                     // Clean up any temporaries needed by the initialization.
    394                     RunCleanupsScope InitScope(*this);
    395                     // Emit initialization for single element.
    396                     setAddrOfLocalVar(VDInit, SrcElement);
    397                     EmitAnyExprToMem(Init, DestElement,
    398                                      Init->getType().getQualifiers(),
    399                                      /*IsInitializer*/ false);
    400                     LocalDeclMap.erase(VDInit);
    401                   });
    402             }
    403             EmitAutoVarCleanups(Emission);
    404             return Emission.getAllocatedAddress();
    405           });
    406         } else {
    407           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
    408             // Emit private VarDecl with copy init.
    409             // Remap temp VDInit variable to the address of the original
    410             // variable
    411             // (for proper handling of captured global variables).
    412             setAddrOfLocalVar(VDInit, OriginalAddr);
    413             EmitDecl(*VD);
    414             LocalDeclMap.erase(VDInit);
    415             return GetAddrOfLocalVar(VD);
    416           });
    417         }
    418         assert(IsRegistered &&
    419                "firstprivate var already registered as private");
    420         // Silence the warning about unused variable.
    421         (void)IsRegistered;
    422       }
    423       ++IRef, ++InitsRef;
    424     }
    425   }
    426   return !EmittedAsFirstprivate.empty();
    427 }
    428 
    429 void CodeGenFunction::EmitOMPPrivateClause(
    430     const OMPExecutableDirective &D,
    431     CodeGenFunction::OMPPrivateScope &PrivateScope) {
    432   if (!HaveInsertPoint())
    433     return;
    434   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
    435   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
    436     auto IRef = C->varlist_begin();
    437     for (auto IInit : C->private_copies()) {
    438       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
    439       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
    440         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
    441         bool IsRegistered =
    442             PrivateScope.addPrivate(OrigVD, [&]() -> Address {
    443               // Emit private VarDecl with copy init.
    444               EmitDecl(*VD);
    445               return GetAddrOfLocalVar(VD);
    446             });
    447         assert(IsRegistered && "private var already registered as private");
    448         // Silence the warning about unused variable.
    449         (void)IsRegistered;
    450       }
    451       ++IRef;
    452     }
    453   }
    454 }
    455 
    456 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
    457   if (!HaveInsertPoint())
    458     return false;
    459   // threadprivate_var1 = master_threadprivate_var1;
    460   // operator=(threadprivate_var2, master_threadprivate_var2);
    461   // ...
    462   // __kmpc_barrier(&loc, global_tid);
    463   llvm::DenseSet<const VarDecl *> CopiedVars;
    464   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
    465   for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
    466     auto IRef = C->varlist_begin();
    467     auto ISrcRef = C->source_exprs().begin();
    468     auto IDestRef = C->destination_exprs().begin();
    469     for (auto *AssignOp : C->assignment_ops()) {
    470       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
    471       QualType Type = VD->getType();
    472       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
    473 
    474         // Get the address of the master variable. If we are emitting code with
    475         // TLS support, the address is passed from the master as field in the
    476         // captured declaration.
    477         Address MasterAddr = Address::invalid();
    478         if (getLangOpts().OpenMPUseTLS &&
    479             getContext().getTargetInfo().isTLSSupported()) {
    480           assert(CapturedStmtInfo->lookup(VD) &&
    481                  "Copyin threadprivates should have been captured!");
    482           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
    483                           VK_LValue, (*IRef)->getExprLoc());
    484           MasterAddr = EmitLValue(&DRE).getAddress();
    485           LocalDeclMap.erase(VD);
    486         } else {
    487           MasterAddr =
    488             Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
    489                                         : CGM.GetAddrOfGlobal(VD),
    490                     getContext().getDeclAlign(VD));
    491         }
    492         // Get the address of the threadprivate variable.
    493         Address PrivateAddr = EmitLValue(*IRef).getAddress();
    494         if (CopiedVars.size() == 1) {
    495           // At first check if current thread is a master thread. If it is, no
    496           // need to copy data.
    497           CopyBegin = createBasicBlock("copyin.not.master");
    498           CopyEnd = createBasicBlock("copyin.not.master.end");
    499           Builder.CreateCondBr(
    500               Builder.CreateICmpNE(
    501                   Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
    502                   Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
    503               CopyBegin, CopyEnd);
    504           EmitBlock(CopyBegin);
    505         }
    506         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
    507         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
    508         EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
    509       }
    510       ++IRef;
    511       ++ISrcRef;
    512       ++IDestRef;
    513     }
    514   }
    515   if (CopyEnd) {
    516     // Exit out of copying procedure for non-master thread.
    517     EmitBlock(CopyEnd, /*IsFinished=*/true);
    518     return true;
    519   }
    520   return false;
    521 }
    522 
    523 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
    524     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
    525   if (!HaveInsertPoint())
    526     return false;
    527   bool HasAtLeastOneLastprivate = false;
    528   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
    529   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
    530     HasAtLeastOneLastprivate = true;
    531     auto IRef = C->varlist_begin();
    532     auto IDestRef = C->destination_exprs().begin();
    533     for (auto *IInit : C->private_copies()) {
    534       // Keep the address of the original variable for future update at the end
    535       // of the loop.
    536       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
    537       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
    538         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
    539         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
    540           DeclRefExpr DRE(
    541               const_cast<VarDecl *>(OrigVD),
    542               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
    543                   OrigVD) != nullptr,
    544               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
    545           return EmitLValue(&DRE).getAddress();
    546         });
    547         // Check if the variable is also a firstprivate: in this case IInit is
    548         // not generated. Initialization of this variable will happen in codegen
    549         // for 'firstprivate' clause.
    550         if (IInit) {
    551           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
    552           bool IsRegistered =
    553               PrivateScope.addPrivate(OrigVD, [&]() -> Address {
    554                 // Emit private VarDecl with copy init.
    555                 EmitDecl(*VD);
    556                 return GetAddrOfLocalVar(VD);
    557               });
    558           assert(IsRegistered &&
    559                  "lastprivate var already registered as private");
    560           (void)IsRegistered;
    561         }
    562       }
    563       ++IRef, ++IDestRef;
    564     }
    565   }
    566   return HasAtLeastOneLastprivate;
    567 }
    568 
    569 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
    570     const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) {
    571   if (!HaveInsertPoint())
    572     return;
    573   // Emit following code:
    574   // if (<IsLastIterCond>) {
    575   //   orig_var1 = private_orig_var1;
    576   //   ...
    577   //   orig_varn = private_orig_varn;
    578   // }
    579   llvm::BasicBlock *ThenBB = nullptr;
    580   llvm::BasicBlock *DoneBB = nullptr;
    581   if (IsLastIterCond) {
    582     ThenBB = createBasicBlock(".omp.lastprivate.then");
    583     DoneBB = createBasicBlock(".omp.lastprivate.done");
    584     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
    585     EmitBlock(ThenBB);
    586   }
    587   llvm::DenseMap<const Decl *, const Expr *> LoopCountersAndUpdates;
    588   const Expr *LastIterVal = nullptr;
    589   const Expr *IVExpr = nullptr;
    590   const Expr *IncExpr = nullptr;
    591   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
    592     if (isOpenMPWorksharingDirective(D.getDirectiveKind())) {
    593       LastIterVal = cast<VarDecl>(cast<DeclRefExpr>(
    594                                       LoopDirective->getUpperBoundVariable())
    595                                       ->getDecl())
    596                         ->getAnyInitializer();
    597       IVExpr = LoopDirective->getIterationVariable();
    598       IncExpr = LoopDirective->getInc();
    599       auto IUpdate = LoopDirective->updates().begin();
    600       for (auto *E : LoopDirective->counters()) {
    601         auto *D = cast<DeclRefExpr>(E)->getDecl()->getCanonicalDecl();
    602         LoopCountersAndUpdates[D] = *IUpdate;
    603         ++IUpdate;
    604       }
    605     }
    606   }
    607   {
    608     llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
    609     bool FirstLCV = true;
    610     for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
    611       auto IRef = C->varlist_begin();
    612       auto ISrcRef = C->source_exprs().begin();
    613       auto IDestRef = C->destination_exprs().begin();
    614       for (auto *AssignOp : C->assignment_ops()) {
    615         auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
    616         QualType Type = PrivateVD->getType();
    617         auto *CanonicalVD = PrivateVD->getCanonicalDecl();
    618         if (AlreadyEmittedVars.insert(CanonicalVD).second) {
    619           // If lastprivate variable is a loop control variable for loop-based
    620           // directive, update its value before copyin back to original
    621           // variable.
    622           if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) {
    623             if (FirstLCV && LastIterVal) {
    624               EmitAnyExprToMem(LastIterVal, EmitLValue(IVExpr).getAddress(),
    625                                IVExpr->getType().getQualifiers(),
    626                                /*IsInitializer=*/false);
    627               EmitIgnoredExpr(IncExpr);
    628               FirstLCV = false;
    629             }
    630             EmitIgnoredExpr(UpExpr);
    631           }
    632           auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
    633           auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
    634           // Get the address of the original variable.
    635           Address OriginalAddr = GetAddrOfLocalVar(DestVD);
    636           // Get the address of the private variable.
    637           Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
    638           if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
    639             PrivateAddr =
    640               Address(Builder.CreateLoad(PrivateAddr),
    641                       getNaturalTypeAlignment(RefTy->getPointeeType()));
    642           EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
    643         }
    644         ++IRef;
    645         ++ISrcRef;
    646         ++IDestRef;
    647       }
    648     }
    649   }
    650   if (IsLastIterCond) {
    651     EmitBlock(DoneBB, /*IsFinished=*/true);
    652   }
    653 }
    654 
    655 void CodeGenFunction::EmitOMPReductionClauseInit(
    656     const OMPExecutableDirective &D,
    657     CodeGenFunction::OMPPrivateScope &PrivateScope) {
    658   if (!HaveInsertPoint())
    659     return;
    660   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
    661     auto ILHS = C->lhs_exprs().begin();
    662     auto IRHS = C->rhs_exprs().begin();
    663     auto IPriv = C->privates().begin();
    664     for (auto IRef : C->varlists()) {
    665       auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
    666       auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
    667       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
    668       if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) {
    669         auto *Base = OASE->getBase()->IgnoreParenImpCasts();
    670         while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
    671           Base = TempOASE->getBase()->IgnoreParenImpCasts();
    672         while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
    673           Base = TempASE->getBase()->IgnoreParenImpCasts();
    674         auto *DE = cast<DeclRefExpr>(Base);
    675         auto *OrigVD = cast<VarDecl>(DE->getDecl());
    676         auto OASELValueLB = EmitOMPArraySectionExpr(OASE);
    677         auto OASELValueUB =
    678             EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
    679         auto OriginalBaseLValue = EmitLValue(DE);
    680         auto BaseLValue = OriginalBaseLValue;
    681         auto *Zero = Builder.getInt64(/*C=*/0);
    682         llvm::SmallVector<llvm::Value *, 4> Indexes;
    683         Indexes.push_back(Zero);
    684         auto *ItemTy =
    685             OASELValueLB.getPointer()->getType()->getPointerElementType();
    686         auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType();
    687         while (Ty != ItemTy) {
    688           Indexes.push_back(Zero);
    689           Ty = Ty->getPointerElementType();
    690         }
    691         BaseLValue = MakeAddrLValue(
    692             Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes),
    693                     OASELValueLB.getAlignment()),
    694             OASELValueLB.getType(), OASELValueLB.getAlignmentSource());
    695         // Store the address of the original variable associated with the LHS
    696         // implicit variable.
    697         PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address {
    698           return OASELValueLB.getAddress();
    699         });
    700         // Emit reduction copy.
    701         bool IsRegistered = PrivateScope.addPrivate(
    702             OrigVD, [this, PrivateVD, BaseLValue, OASELValueLB, OASELValueUB,
    703                      OriginalBaseLValue]() -> Address {
    704               // Emit VarDecl with copy init for arrays.
    705               // Get the address of the original variable captured in current
    706               // captured region.
    707               auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(),
    708                                                  OASELValueLB.getPointer());
    709               Size = Builder.CreateNUWAdd(
    710                   Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
    711               CodeGenFunction::OpaqueValueMapping OpaqueMap(
    712                   *this, cast<OpaqueValueExpr>(
    713                              getContext()
    714                                  .getAsVariableArrayType(PrivateVD->getType())
    715                                  ->getSizeExpr()),
    716                   RValue::get(Size));
    717               EmitVariablyModifiedType(PrivateVD->getType());
    718               auto Emission = EmitAutoVarAlloca(*PrivateVD);
    719               auto Addr = Emission.getAllocatedAddress();
    720               auto *Init = PrivateVD->getInit();
    721               EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init);
    722               EmitAutoVarCleanups(Emission);
    723               // Emit private VarDecl with reduction init.
    724               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
    725                                                    OASELValueLB.getPointer());
    726               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
    727               Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
    728                   Ptr, OriginalBaseLValue.getPointer()->getType());
    729               return Address(Ptr, OriginalBaseLValue.getAlignment());
    730             });
    731         assert(IsRegistered && "private var already registered as private");
    732         // Silence the warning about unused variable.
    733         (void)IsRegistered;
    734         PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
    735           return GetAddrOfLocalVar(PrivateVD);
    736         });
    737       } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) {
    738         auto *Base = ASE->getBase()->IgnoreParenImpCasts();
    739         while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
    740           Base = TempASE->getBase()->IgnoreParenImpCasts();
    741         auto *DE = cast<DeclRefExpr>(Base);
    742         auto *OrigVD = cast<VarDecl>(DE->getDecl());
    743         auto ASELValue = EmitLValue(ASE);
    744         auto OriginalBaseLValue = EmitLValue(DE);
    745         auto BaseLValue = OriginalBaseLValue;
    746         auto *Zero = Builder.getInt64(/*C=*/0);
    747         llvm::SmallVector<llvm::Value *, 4> Indexes;
    748         Indexes.push_back(Zero);
    749         auto *ItemTy =
    750             ASELValue.getPointer()->getType()->getPointerElementType();
    751         auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType();
    752         while (Ty != ItemTy) {
    753           Indexes.push_back(Zero);
    754           Ty = Ty->getPointerElementType();
    755         }
    756         BaseLValue = MakeAddrLValue(
    757             Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes),
    758                     ASELValue.getAlignment()),
    759             ASELValue.getType(), ASELValue.getAlignmentSource());
    760         // Store the address of the original variable associated with the LHS
    761         // implicit variable.
    762         PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address {
    763           return ASELValue.getAddress();
    764         });
    765         // Emit reduction copy.
    766         bool IsRegistered = PrivateScope.addPrivate(
    767             OrigVD, [this, PrivateVD, BaseLValue, ASELValue,
    768                      OriginalBaseLValue]() -> Address {
    769               // Emit private VarDecl with reduction init.
    770               EmitDecl(*PrivateVD);
    771               auto Addr = GetAddrOfLocalVar(PrivateVD);
    772               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
    773                                                    ASELValue.getPointer());
    774               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
    775               Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
    776                   Ptr, OriginalBaseLValue.getPointer()->getType());
    777               return Address(Ptr, OriginalBaseLValue.getAlignment());
    778             });
    779         assert(IsRegistered && "private var already registered as private");
    780         // Silence the warning about unused variable.
    781         (void)IsRegistered;
    782         PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
    783           return GetAddrOfLocalVar(PrivateVD);
    784         });
    785       } else {
    786         auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
    787         // Store the address of the original variable associated with the LHS
    788         // implicit variable.
    789         PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> Address {
    790           DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
    791                           CapturedStmtInfo->lookup(OrigVD) != nullptr,
    792                           IRef->getType(), VK_LValue, IRef->getExprLoc());
    793           return EmitLValue(&DRE).getAddress();
    794         });
    795         // Emit reduction copy.
    796         bool IsRegistered =
    797             PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> Address {
    798               // Emit private VarDecl with reduction init.
    799               EmitDecl(*PrivateVD);
    800               return GetAddrOfLocalVar(PrivateVD);
    801             });
    802         assert(IsRegistered && "private var already registered as private");
    803         // Silence the warning about unused variable.
    804         (void)IsRegistered;
    805         PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
    806           return GetAddrOfLocalVar(PrivateVD);
    807         });
    808       }
    809       ++ILHS, ++IRHS, ++IPriv;
    810     }
    811   }
    812 }
    813 
    814 void CodeGenFunction::EmitOMPReductionClauseFinal(
    815     const OMPExecutableDirective &D) {
    816   if (!HaveInsertPoint())
    817     return;
    818   llvm::SmallVector<const Expr *, 8> Privates;
    819   llvm::SmallVector<const Expr *, 8> LHSExprs;
    820   llvm::SmallVector<const Expr *, 8> RHSExprs;
    821   llvm::SmallVector<const Expr *, 8> ReductionOps;
    822   bool HasAtLeastOneReduction = false;
    823   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
    824     HasAtLeastOneReduction = true;
    825     Privates.append(C->privates().begin(), C->privates().end());
    826     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
    827     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
    828     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
    829   }
    830   if (HasAtLeastOneReduction) {
    831     // Emit nowait reduction if nowait clause is present or directive is a
    832     // parallel directive (it always has implicit barrier).
    833     CGM.getOpenMPRuntime().emitReduction(
    834         *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
    835         D.getSingleClause<OMPNowaitClause>() ||
    836             isOpenMPParallelDirective(D.getDirectiveKind()) ||
    837             D.getDirectiveKind() == OMPD_simd,
    838         D.getDirectiveKind() == OMPD_simd);
    839   }
    840 }
    841 
    842 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
    843                                            const OMPExecutableDirective &S,
    844                                            OpenMPDirectiveKind InnermostKind,
    845                                            const RegionCodeGenTy &CodeGen) {
    846   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
    847   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
    848   CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
    849   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
    850       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
    851   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
    852     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
    853     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
    854                                          /*IgnoreResultAssign*/ true);
    855     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
    856         CGF, NumThreads, NumThreadsClause->getLocStart());
    857   }
    858   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
    859     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
    860     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
    861         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
    862   }
    863   const Expr *IfCond = nullptr;
    864   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
    865     if (C->getNameModifier() == OMPD_unknown ||
    866         C->getNameModifier() == OMPD_parallel) {
    867       IfCond = C->getCondition();
    868       break;
    869     }
    870   }
    871   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
    872                                               CapturedVars, IfCond);
    873 }
    874 
    875 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
    876   LexicalScope Scope(*this, S.getSourceRange());
    877   // Emit parallel region as a standalone region.
    878   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
    879     OMPPrivateScope PrivateScope(CGF);
    880     bool Copyins = CGF.EmitOMPCopyinClause(S);
    881     bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope);
    882     if (Copyins || Firstprivates) {
    883       // Emit implicit barrier to synchronize threads and avoid data races on
    884       // initialization of firstprivate variables or propagation master's thread
    885       // values of threadprivate variables to local instances of that variables
    886       // of all other implicit threads.
    887       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
    888           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
    889           /*ForceSimpleCall=*/true);
    890     }
    891     CGF.EmitOMPPrivateClause(S, PrivateScope);
    892     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
    893     (void)PrivateScope.Privatize();
    894     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
    895     CGF.EmitOMPReductionClauseFinal(S);
    896   };
    897   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen);
    898 }
    899 
    900 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
    901                                       JumpDest LoopExit) {
    902   RunCleanupsScope BodyScope(*this);
    903   // Update counters values on current iteration.
    904   for (auto I : D.updates()) {
    905     EmitIgnoredExpr(I);
    906   }
    907   // Update the linear variables.
    908   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
    909     for (auto U : C->updates()) {
    910       EmitIgnoredExpr(U);
    911     }
    912   }
    913 
    914   // On a continue in the body, jump to the end.
    915   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
    916   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
    917   // Emit loop body.
    918   EmitStmt(D.getBody());
    919   // The end (updates/cleanups).
    920   EmitBlock(Continue.getBlock());
    921   BreakContinueStack.pop_back();
    922     // TODO: Update lastprivates if the SeparateIter flag is true.
    923     // This will be implemented in a follow-up OMPLastprivateClause patch, but
    924     // result should be still correct without it, as we do not make these
    925     // variables private yet.
    926 }
    927 
    928 void CodeGenFunction::EmitOMPInnerLoop(
    929     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
    930     const Expr *IncExpr,
    931     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
    932     const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
    933   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
    934 
    935   // Start the loop with a block that tests the condition.
    936   auto CondBlock = createBasicBlock("omp.inner.for.cond");
    937   EmitBlock(CondBlock);
    938   LoopStack.push(CondBlock);
    939 
    940   // If there are any cleanups between here and the loop-exit scope,
    941   // create a block to stage a loop exit along.
    942   auto ExitBlock = LoopExit.getBlock();
    943   if (RequiresCleanup)
    944     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
    945 
    946   auto LoopBody = createBasicBlock("omp.inner.for.body");
    947 
    948   // Emit condition.
    949   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
    950   if (ExitBlock != LoopExit.getBlock()) {
    951     EmitBlock(ExitBlock);
    952     EmitBranchThroughCleanup(LoopExit);
    953   }
    954 
    955   EmitBlock(LoopBody);
    956   incrementProfileCounter(&S);
    957 
    958   // Create a block for the increment.
    959   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
    960   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
    961 
    962   BodyGen(*this);
    963 
    964   // Emit "IV = IV + 1" and a back-edge to the condition block.
    965   EmitBlock(Continue.getBlock());
    966   EmitIgnoredExpr(IncExpr);
    967   PostIncGen(*this);
    968   BreakContinueStack.pop_back();
    969   EmitBranch(CondBlock);
    970   LoopStack.pop();
    971   // Emit the fall-through block.
    972   EmitBlock(LoopExit.getBlock());
    973 }
    974 
    975 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
    976   if (!HaveInsertPoint())
    977     return;
    978   // Emit inits for the linear variables.
    979   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
    980     for (auto Init : C->inits()) {
    981       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
    982       auto *OrigVD = cast<VarDecl>(
    983           cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())->getDecl());
    984       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
    985                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
    986                       VD->getInit()->getType(), VK_LValue,
    987                       VD->getInit()->getExprLoc());
    988       AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
    989       EmitExprAsInit(&DRE, VD,
    990                MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
    991                      /*capturedByInit=*/false);
    992       EmitAutoVarCleanups(Emission);
    993     }
    994     // Emit the linear steps for the linear clauses.
    995     // If a step is not constant, it is pre-calculated before the loop.
    996     if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
    997       if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
    998         EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
    999         // Emit calculation of the linear step.
   1000         EmitIgnoredExpr(CS);
   1001       }
   1002   }
   1003 }
   1004 
   1005 static void emitLinearClauseFinal(CodeGenFunction &CGF,
   1006                                   const OMPLoopDirective &D) {
   1007   if (!CGF.HaveInsertPoint())
   1008     return;
   1009   // Emit the final values of the linear variables.
   1010   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
   1011     auto IC = C->varlist_begin();
   1012     for (auto F : C->finals()) {
   1013       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
   1014       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
   1015                       CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
   1016                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
   1017       Address OrigAddr = CGF.EmitLValue(&DRE).getAddress();
   1018       CodeGenFunction::OMPPrivateScope VarScope(CGF);
   1019       VarScope.addPrivate(OrigVD,
   1020                           [OrigAddr]() -> Address { return OrigAddr; });
   1021       (void)VarScope.Privatize();
   1022       CGF.EmitIgnoredExpr(F);
   1023       ++IC;
   1024     }
   1025   }
   1026 }
   1027 
   1028 static void emitAlignedClause(CodeGenFunction &CGF,
   1029                               const OMPExecutableDirective &D) {
   1030   if (!CGF.HaveInsertPoint())
   1031     return;
   1032   for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
   1033     unsigned ClauseAlignment = 0;
   1034     if (auto AlignmentExpr = Clause->getAlignment()) {
   1035       auto AlignmentCI =
   1036           cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
   1037       ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
   1038     }
   1039     for (auto E : Clause->varlists()) {
   1040       unsigned Alignment = ClauseAlignment;
   1041       if (Alignment == 0) {
   1042         // OpenMP [2.8.1, Description]
   1043         // If no optional parameter is specified, implementation-defined default
   1044         // alignments for SIMD instructions on the target platforms are assumed.
   1045         Alignment =
   1046             CGF.getContext()
   1047                 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
   1048                     E->getType()->getPointeeType()))
   1049                 .getQuantity();
   1050       }
   1051       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
   1052              "alignment is not power of 2");
   1053       if (Alignment != 0) {
   1054         llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
   1055         CGF.EmitAlignmentAssumption(PtrValue, Alignment);
   1056       }
   1057     }
   1058   }
   1059 }
   1060 
   1061 static void emitPrivateLoopCounters(CodeGenFunction &CGF,
   1062                                     CodeGenFunction::OMPPrivateScope &LoopScope,
   1063                                     ArrayRef<Expr *> Counters,
   1064                                     ArrayRef<Expr *> PrivateCounters) {
   1065   if (!CGF.HaveInsertPoint())
   1066     return;
   1067   auto I = PrivateCounters.begin();
   1068   for (auto *E : Counters) {
   1069     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
   1070     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
   1071     Address Addr = Address::invalid();
   1072     (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
   1073       // Emit var without initialization.
   1074       auto VarEmission = CGF.EmitAutoVarAlloca(*PrivateVD);
   1075       CGF.EmitAutoVarCleanups(VarEmission);
   1076       Addr = VarEmission.getAllocatedAddress();
   1077       return Addr;
   1078     });
   1079     (void)LoopScope.addPrivate(VD, [&]() -> Address { return Addr; });
   1080     ++I;
   1081   }
   1082 }
   1083 
   1084 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
   1085                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
   1086                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
   1087   if (!CGF.HaveInsertPoint())
   1088     return;
   1089   {
   1090     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
   1091     emitPrivateLoopCounters(CGF, PreCondScope, S.counters(),
   1092                             S.private_counters());
   1093     (void)PreCondScope.Privatize();
   1094     // Get initial values of real counters.
   1095     for (auto I : S.inits()) {
   1096       CGF.EmitIgnoredExpr(I);
   1097     }
   1098   }
   1099   // Check that loop is executed at least one time.
   1100   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
   1101 }
   1102 
   1103 static void
   1104 emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
   1105                       CodeGenFunction::OMPPrivateScope &PrivateScope) {
   1106   if (!CGF.HaveInsertPoint())
   1107     return;
   1108   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
   1109     auto CurPrivate = C->privates().begin();
   1110     for (auto *E : C->varlists()) {
   1111       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
   1112       auto *PrivateVD =
   1113           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
   1114       bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
   1115         // Emit private VarDecl with copy init.
   1116         CGF.EmitVarDecl(*PrivateVD);
   1117         return CGF.GetAddrOfLocalVar(PrivateVD);
   1118       });
   1119       assert(IsRegistered && "linear var already registered as private");
   1120       // Silence the warning about unused variable.
   1121       (void)IsRegistered;
   1122       ++CurPrivate;
   1123     }
   1124   }
   1125 }
   1126 
   1127 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
   1128                                      const OMPExecutableDirective &D) {
   1129   if (!CGF.HaveInsertPoint())
   1130     return;
   1131   if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
   1132     RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
   1133                                  /*ignoreResult=*/true);
   1134     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
   1135     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
   1136     // In presence of finite 'safelen', it may be unsafe to mark all
   1137     // the memory instructions parallel, because loop-carried
   1138     // dependences of 'safelen' iterations are possible.
   1139     CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
   1140   } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
   1141     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
   1142                                  /*ignoreResult=*/true);
   1143     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
   1144     CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
   1145     // In presence of finite 'safelen', it may be unsafe to mark all
   1146     // the memory instructions parallel, because loop-carried
   1147     // dependences of 'safelen' iterations are possible.
   1148     CGF.LoopStack.setParallel(false);
   1149   }
   1150 }
   1151 
   1152 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
   1153   // Walk clauses and process safelen/lastprivate.
   1154   LoopStack.setParallel();
   1155   LoopStack.setVectorizeEnable(true);
   1156   emitSimdlenSafelenClause(*this, D);
   1157 }
   1158 
   1159 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) {
   1160   if (!HaveInsertPoint())
   1161     return;
   1162   auto IC = D.counters().begin();
   1163   for (auto F : D.finals()) {
   1164     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
   1165     if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) {
   1166       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
   1167                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
   1168                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
   1169       Address OrigAddr = EmitLValue(&DRE).getAddress();
   1170       OMPPrivateScope VarScope(*this);
   1171       VarScope.addPrivate(OrigVD,
   1172                           [OrigAddr]() -> Address { return OrigAddr; });
   1173       (void)VarScope.Privatize();
   1174       EmitIgnoredExpr(F);
   1175     }
   1176     ++IC;
   1177   }
   1178   emitLinearClauseFinal(*this, D);
   1179 }
   1180 
   1181 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
   1182   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
   1183     // if (PreCond) {
   1184     //   for (IV in 0..LastIteration) BODY;
   1185     //   <Final counter/linear vars updates>;
   1186     // }
   1187     //
   1188 
   1189     // Emit: if (PreCond) - begin.
   1190     // If the condition constant folds and can be elided, avoid emitting the
   1191     // whole loop.
   1192     bool CondConstant;
   1193     llvm::BasicBlock *ContBlock = nullptr;
   1194     if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
   1195       if (!CondConstant)
   1196         return;
   1197     } else {
   1198       auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
   1199       ContBlock = CGF.createBasicBlock("simd.if.end");
   1200       emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
   1201                   CGF.getProfileCount(&S));
   1202       CGF.EmitBlock(ThenBlock);
   1203       CGF.incrementProfileCounter(&S);
   1204     }
   1205 
   1206     // Emit the loop iteration variable.
   1207     const Expr *IVExpr = S.getIterationVariable();
   1208     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
   1209     CGF.EmitVarDecl(*IVDecl);
   1210     CGF.EmitIgnoredExpr(S.getInit());
   1211 
   1212     // Emit the iterations count variable.
   1213     // If it is not a variable, Sema decided to calculate iterations count on
   1214     // each iteration (e.g., it is foldable into a constant).
   1215     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
   1216       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
   1217       // Emit calculation of the iterations count.
   1218       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
   1219     }
   1220 
   1221     CGF.EmitOMPSimdInit(S);
   1222 
   1223     emitAlignedClause(CGF, S);
   1224     CGF.EmitOMPLinearClauseInit(S);
   1225     bool HasLastprivateClause;
   1226     {
   1227       OMPPrivateScope LoopScope(CGF);
   1228       emitPrivateLoopCounters(CGF, LoopScope, S.counters(),
   1229                               S.private_counters());
   1230       emitPrivateLinearVars(CGF, S, LoopScope);
   1231       CGF.EmitOMPPrivateClause(S, LoopScope);
   1232       CGF.EmitOMPReductionClauseInit(S, LoopScope);
   1233       HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
   1234       (void)LoopScope.Privatize();
   1235       CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
   1236                            S.getInc(),
   1237                            [&S](CodeGenFunction &CGF) {
   1238                              CGF.EmitOMPLoopBody(S, JumpDest());
   1239                              CGF.EmitStopPoint(&S);
   1240                            },
   1241                            [](CodeGenFunction &) {});
   1242       // Emit final copy of the lastprivate variables at the end of loops.
   1243       if (HasLastprivateClause) {
   1244         CGF.EmitOMPLastprivateClauseFinal(S);
   1245       }
   1246       CGF.EmitOMPReductionClauseFinal(S);
   1247     }
   1248     CGF.EmitOMPSimdFinal(S);
   1249     // Emit: if (PreCond) - end.
   1250     if (ContBlock) {
   1251       CGF.EmitBranch(ContBlock);
   1252       CGF.EmitBlock(ContBlock, true);
   1253     }
   1254   };
   1255   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
   1256 }
   1257 
   1258 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
   1259                                           const OMPLoopDirective &S,
   1260                                           OMPPrivateScope &LoopScope,
   1261                                           bool Ordered, Address LB,
   1262                                           Address UB, Address ST,
   1263                                           Address IL, llvm::Value *Chunk) {
   1264   auto &RT = CGM.getOpenMPRuntime();
   1265 
   1266   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
   1267   const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind);
   1268 
   1269   assert((Ordered ||
   1270           !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) &&
   1271          "static non-chunked schedule does not need outer loop");
   1272 
   1273   // Emit outer loop.
   1274   //
   1275   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
   1276   // When schedule(dynamic,chunk_size) is specified, the iterations are
   1277   // distributed to threads in the team in chunks as the threads request them.
   1278   // Each thread executes a chunk of iterations, then requests another chunk,
   1279   // until no chunks remain to be distributed. Each chunk contains chunk_size
   1280   // iterations, except for the last chunk to be distributed, which may have
   1281   // fewer iterations. When no chunk_size is specified, it defaults to 1.
   1282   //
   1283   // When schedule(guided,chunk_size) is specified, the iterations are assigned
   1284   // to threads in the team in chunks as the executing threads request them.
   1285   // Each thread executes a chunk of iterations, then requests another chunk,
   1286   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
   1287   // each chunk is proportional to the number of unassigned iterations divided
   1288   // by the number of threads in the team, decreasing to 1. For a chunk_size
   1289   // with value k (greater than 1), the size of each chunk is determined in the
   1290   // same way, with the restriction that the chunks do not contain fewer than k
   1291   // iterations (except for the last chunk to be assigned, which may have fewer
   1292   // than k iterations).
   1293   //
   1294   // When schedule(auto) is specified, the decision regarding scheduling is
   1295   // delegated to the compiler and/or runtime system. The programmer gives the
   1296   // implementation the freedom to choose any possible mapping of iterations to
   1297   // threads in the team.
   1298   //
   1299   // When schedule(runtime) is specified, the decision regarding scheduling is
   1300   // deferred until run time, and the schedule and chunk size are taken from the
   1301   // run-sched-var ICV. If the ICV is set to auto, the schedule is
   1302   // implementation defined
   1303   //
   1304   // while(__kmpc_dispatch_next(&LB, &UB)) {
   1305   //   idx = LB;
   1306   //   while (idx <= UB) { BODY; ++idx;
   1307   //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
   1308   //   } // inner loop
   1309   // }
   1310   //
   1311   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
   1312   // When schedule(static, chunk_size) is specified, iterations are divided into
   1313   // chunks of size chunk_size, and the chunks are assigned to the threads in
   1314   // the team in a round-robin fashion in the order of the thread number.
   1315   //
   1316   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
   1317   //   while (idx <= UB) { BODY; ++idx; } // inner loop
   1318   //   LB = LB + ST;
   1319   //   UB = UB + ST;
   1320   // }
   1321   //
   1322 
   1323   const Expr *IVExpr = S.getIterationVariable();
   1324   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
   1325   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
   1326 
   1327   if (DynamicOrOrdered) {
   1328     llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration());
   1329     RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind,
   1330                            IVSize, IVSigned, Ordered, UBVal, Chunk);
   1331   } else {
   1332     RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
   1333                          IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk);
   1334   }
   1335 
   1336   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
   1337 
   1338   // Start the loop with a block that tests the condition.
   1339   auto CondBlock = createBasicBlock("omp.dispatch.cond");
   1340   EmitBlock(CondBlock);
   1341   LoopStack.push(CondBlock);
   1342 
   1343   llvm::Value *BoolCondVal = nullptr;
   1344   if (!DynamicOrOrdered) {
   1345     // UB = min(UB, GlobalUB)
   1346     EmitIgnoredExpr(S.getEnsureUpperBound());
   1347     // IV = LB
   1348     EmitIgnoredExpr(S.getInit());
   1349     // IV < UB
   1350     BoolCondVal = EvaluateExprAsBool(S.getCond());
   1351   } else {
   1352     BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned,
   1353                                     IL, LB, UB, ST);
   1354   }
   1355 
   1356   // If there are any cleanups between here and the loop-exit scope,
   1357   // create a block to stage a loop exit along.
   1358   auto ExitBlock = LoopExit.getBlock();
   1359   if (LoopScope.requiresCleanups())
   1360     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
   1361 
   1362   auto LoopBody = createBasicBlock("omp.dispatch.body");
   1363   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
   1364   if (ExitBlock != LoopExit.getBlock()) {
   1365     EmitBlock(ExitBlock);
   1366     EmitBranchThroughCleanup(LoopExit);
   1367   }
   1368   EmitBlock(LoopBody);
   1369 
   1370   // Emit "IV = LB" (in case of static schedule, we have already calculated new
   1371   // LB for loop condition and emitted it above).
   1372   if (DynamicOrOrdered)
   1373     EmitIgnoredExpr(S.getInit());
   1374 
   1375   // Create a block for the increment.
   1376   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
   1377   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
   1378 
   1379   // Generate !llvm.loop.parallel metadata for loads and stores for loops
   1380   // with dynamic/guided scheduling and without ordered clause.
   1381   if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
   1382     LoopStack.setParallel((ScheduleKind == OMPC_SCHEDULE_dynamic ||
   1383                            ScheduleKind == OMPC_SCHEDULE_guided) &&
   1384                           !Ordered);
   1385   } else {
   1386     EmitOMPSimdInit(S);
   1387   }
   1388 
   1389   SourceLocation Loc = S.getLocStart();
   1390   EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
   1391                    [&S, LoopExit](CodeGenFunction &CGF) {
   1392                      CGF.EmitOMPLoopBody(S, LoopExit);
   1393                      CGF.EmitStopPoint(&S);
   1394                    },
   1395                    [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) {
   1396                      if (Ordered) {
   1397                        CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(
   1398                            CGF, Loc, IVSize, IVSigned);
   1399                      }
   1400                    });
   1401 
   1402   EmitBlock(Continue.getBlock());
   1403   BreakContinueStack.pop_back();
   1404   if (!DynamicOrOrdered) {
   1405     // Emit "LB = LB + Stride", "UB = UB + Stride".
   1406     EmitIgnoredExpr(S.getNextLowerBound());
   1407     EmitIgnoredExpr(S.getNextUpperBound());
   1408   }
   1409 
   1410   EmitBranch(CondBlock);
   1411   LoopStack.pop();
   1412   // Emit the fall-through block.
   1413   EmitBlock(LoopExit.getBlock());
   1414 
   1415   // Tell the runtime we are done.
   1416   if (!DynamicOrOrdered)
   1417     RT.emitForStaticFinish(*this, S.getLocEnd());
   1418 }
   1419 
   1420 /// \brief Emit a helper variable and return corresponding lvalue.
   1421 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
   1422                                const DeclRefExpr *Helper) {
   1423   auto VDecl = cast<VarDecl>(Helper->getDecl());
   1424   CGF.EmitVarDecl(*VDecl);
   1425   return CGF.EmitLValue(Helper);
   1426 }
   1427 
   1428 static std::pair<llvm::Value * /*Chunk*/, OpenMPScheduleClauseKind>
   1429 emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S,
   1430                    bool OuterRegion) {
   1431   // Detect the loop schedule kind and chunk.
   1432   auto ScheduleKind = OMPC_SCHEDULE_unknown;
   1433   llvm::Value *Chunk = nullptr;
   1434   if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
   1435     ScheduleKind = C->getScheduleKind();
   1436     if (const auto *Ch = C->getChunkSize()) {
   1437       if (auto *ImpRef = cast_or_null<DeclRefExpr>(C->getHelperChunkSize())) {
   1438         if (OuterRegion) {
   1439           const VarDecl *ImpVar = cast<VarDecl>(ImpRef->getDecl());
   1440           CGF.EmitVarDecl(*ImpVar);
   1441           CGF.EmitStoreThroughLValue(
   1442               CGF.EmitAnyExpr(Ch),
   1443               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(ImpVar),
   1444                                  ImpVar->getType()));
   1445         } else {
   1446           Ch = ImpRef;
   1447         }
   1448       }
   1449       if (!C->getHelperChunkSize() || !OuterRegion) {
   1450         Chunk = CGF.EmitScalarExpr(Ch);
   1451         Chunk = CGF.EmitScalarConversion(Chunk, Ch->getType(),
   1452                                          S.getIterationVariable()->getType(),
   1453                                          S.getLocStart());
   1454       }
   1455     }
   1456   }
   1457   return std::make_pair(Chunk, ScheduleKind);
   1458 }
   1459 
   1460 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
   1461   // Emit the loop iteration variable.
   1462   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
   1463   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
   1464   EmitVarDecl(*IVDecl);
   1465 
   1466   // Emit the iterations count variable.
   1467   // If it is not a variable, Sema decided to calculate iterations count on each
   1468   // iteration (e.g., it is foldable into a constant).
   1469   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
   1470     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
   1471     // Emit calculation of the iterations count.
   1472     EmitIgnoredExpr(S.getCalcLastIteration());
   1473   }
   1474 
   1475   auto &RT = CGM.getOpenMPRuntime();
   1476 
   1477   bool HasLastprivateClause;
   1478   // Check pre-condition.
   1479   {
   1480     // Skip the entire loop if we don't meet the precondition.
   1481     // If the condition constant folds and can be elided, avoid emitting the
   1482     // whole loop.
   1483     bool CondConstant;
   1484     llvm::BasicBlock *ContBlock = nullptr;
   1485     if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
   1486       if (!CondConstant)
   1487         return false;
   1488     } else {
   1489       auto *ThenBlock = createBasicBlock("omp.precond.then");
   1490       ContBlock = createBasicBlock("omp.precond.end");
   1491       emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
   1492                   getProfileCount(&S));
   1493       EmitBlock(ThenBlock);
   1494       incrementProfileCounter(&S);
   1495     }
   1496 
   1497     emitAlignedClause(*this, S);
   1498     EmitOMPLinearClauseInit(S);
   1499     // Emit 'then' code.
   1500     {
   1501       // Emit helper vars inits.
   1502       LValue LB =
   1503           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
   1504       LValue UB =
   1505           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
   1506       LValue ST =
   1507           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
   1508       LValue IL =
   1509           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
   1510 
   1511       OMPPrivateScope LoopScope(*this);
   1512       if (EmitOMPFirstprivateClause(S, LoopScope)) {
   1513         // Emit implicit barrier to synchronize threads and avoid data races on
   1514         // initialization of firstprivate variables.
   1515         CGM.getOpenMPRuntime().emitBarrierCall(
   1516             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
   1517             /*ForceSimpleCall=*/true);
   1518       }
   1519       EmitOMPPrivateClause(S, LoopScope);
   1520       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
   1521       EmitOMPReductionClauseInit(S, LoopScope);
   1522       emitPrivateLoopCounters(*this, LoopScope, S.counters(),
   1523                               S.private_counters());
   1524       emitPrivateLinearVars(*this, S, LoopScope);
   1525       (void)LoopScope.Privatize();
   1526 
   1527       // Detect the loop schedule kind and chunk.
   1528       llvm::Value *Chunk;
   1529       OpenMPScheduleClauseKind ScheduleKind;
   1530       auto ScheduleInfo =
   1531           emitScheduleClause(*this, S, /*OuterRegion=*/false);
   1532       Chunk = ScheduleInfo.first;
   1533       ScheduleKind = ScheduleInfo.second;
   1534       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
   1535       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
   1536       const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr;
   1537       if (RT.isStaticNonchunked(ScheduleKind,
   1538                                 /* Chunked */ Chunk != nullptr) &&
   1539           !Ordered) {
   1540         if (isOpenMPSimdDirective(S.getDirectiveKind())) {
   1541           EmitOMPSimdInit(S);
   1542         }
   1543         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
   1544         // When no chunk_size is specified, the iteration space is divided into
   1545         // chunks that are approximately equal in size, and at most one chunk is
   1546         // distributed to each thread. Note that the size of the chunks is
   1547         // unspecified in this case.
   1548         RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
   1549                              IVSize, IVSigned, Ordered,
   1550                              IL.getAddress(), LB.getAddress(),
   1551                              UB.getAddress(), ST.getAddress());
   1552         auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
   1553         // UB = min(UB, GlobalUB);
   1554         EmitIgnoredExpr(S.getEnsureUpperBound());
   1555         // IV = LB;
   1556         EmitIgnoredExpr(S.getInit());
   1557         // while (idx <= UB) { BODY; ++idx; }
   1558         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
   1559                          S.getInc(),
   1560                          [&S, LoopExit](CodeGenFunction &CGF) {
   1561                            CGF.EmitOMPLoopBody(S, LoopExit);
   1562                            CGF.EmitStopPoint(&S);
   1563                          },
   1564                          [](CodeGenFunction &) {});
   1565         EmitBlock(LoopExit.getBlock());
   1566         // Tell the runtime we are done.
   1567         RT.emitForStaticFinish(*this, S.getLocStart());
   1568       } else {
   1569         // Emit the outer loop, which requests its work chunk [LB..UB] from
   1570         // runtime and runs the inner loop to process it.
   1571         EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, Ordered,
   1572                             LB.getAddress(), UB.getAddress(), ST.getAddress(),
   1573                             IL.getAddress(), Chunk);
   1574       }
   1575       EmitOMPReductionClauseFinal(S);
   1576       // Emit final copy of the lastprivate variables if IsLastIter != 0.
   1577       if (HasLastprivateClause)
   1578         EmitOMPLastprivateClauseFinal(
   1579             S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
   1580     }
   1581     if (isOpenMPSimdDirective(S.getDirectiveKind())) {
   1582       EmitOMPSimdFinal(S);
   1583     }
   1584     // We're now done with the loop, so jump to the continuation block.
   1585     if (ContBlock) {
   1586       EmitBranch(ContBlock);
   1587       EmitBlock(ContBlock, true);
   1588     }
   1589   }
   1590   return HasLastprivateClause;
   1591 }
   1592 
   1593 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
   1594   LexicalScope Scope(*this, S.getSourceRange());
   1595   bool HasLastprivates = false;
   1596   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
   1597     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
   1598   };
   1599   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
   1600                                               S.hasCancel());
   1601 
   1602   // Emit an implicit barrier at the end.
   1603   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
   1604     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
   1605   }
   1606 }
   1607 
   1608 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
   1609   LexicalScope Scope(*this, S.getSourceRange());
   1610   bool HasLastprivates = false;
   1611   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
   1612     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
   1613   };
   1614   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
   1615 
   1616   // Emit an implicit barrier at the end.
   1617   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
   1618     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
   1619   }
   1620 }
   1621 
   1622 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
   1623                                 const Twine &Name,
   1624                                 llvm::Value *Init = nullptr) {
   1625   auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
   1626   if (Init)
   1627     CGF.EmitScalarInit(Init, LVal);
   1628   return LVal;
   1629 }
   1630 
   1631 OpenMPDirectiveKind
   1632 CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
   1633   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
   1634   auto *CS = dyn_cast<CompoundStmt>(Stmt);
   1635   if (CS && CS->size() > 1) {
   1636     bool HasLastprivates = false;
   1637     auto &&CodeGen = [&S, CS, &HasLastprivates](CodeGenFunction &CGF) {
   1638       auto &C = CGF.CGM.getContext();
   1639       auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
   1640       // Emit helper vars inits.
   1641       LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
   1642                                     CGF.Builder.getInt32(0));
   1643       auto *GlobalUBVal = CGF.Builder.getInt32(CS->size() - 1);
   1644       LValue UB =
   1645           createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
   1646       LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
   1647                                     CGF.Builder.getInt32(1));
   1648       LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
   1649                                     CGF.Builder.getInt32(0));
   1650       // Loop counter.
   1651       LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
   1652       OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
   1653       CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
   1654       OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
   1655       CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
   1656       // Generate condition for loop.
   1657       BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
   1658                           OK_Ordinary, S.getLocStart(),
   1659                           /*fpContractable=*/false);
   1660       // Increment for loop counter.
   1661       UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue,
   1662                         OK_Ordinary, S.getLocStart());
   1663       auto BodyGen = [CS, &S, &IV](CodeGenFunction &CGF) {
   1664         // Iterate through all sections and emit a switch construct:
   1665         // switch (IV) {
   1666         //   case 0:
   1667         //     <SectionStmt[0]>;
   1668         //     break;
   1669         // ...
   1670         //   case <NumSection> - 1:
   1671         //     <SectionStmt[<NumSection> - 1]>;
   1672         //     break;
   1673         // }
   1674         // .omp.sections.exit:
   1675         auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
   1676         auto *SwitchStmt = CGF.Builder.CreateSwitch(
   1677             CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
   1678             CS->size());
   1679         unsigned CaseNumber = 0;
   1680         for (auto *SubStmt : CS->children()) {
   1681           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
   1682           CGF.EmitBlock(CaseBB);
   1683           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
   1684           CGF.EmitStmt(SubStmt);
   1685           CGF.EmitBranch(ExitBB);
   1686           ++CaseNumber;
   1687         }
   1688         CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
   1689       };
   1690 
   1691       CodeGenFunction::OMPPrivateScope LoopScope(CGF);
   1692       if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
   1693         // Emit implicit barrier to synchronize threads and avoid data races on
   1694         // initialization of firstprivate variables.
   1695         CGF.CGM.getOpenMPRuntime().emitBarrierCall(
   1696             CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
   1697             /*ForceSimpleCall=*/true);
   1698       }
   1699       CGF.EmitOMPPrivateClause(S, LoopScope);
   1700       HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
   1701       CGF.EmitOMPReductionClauseInit(S, LoopScope);
   1702       (void)LoopScope.Privatize();
   1703 
   1704       // Emit static non-chunked loop.
   1705       CGF.CGM.getOpenMPRuntime().emitForStaticInit(
   1706           CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
   1707           /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
   1708           LB.getAddress(), UB.getAddress(), ST.getAddress());
   1709       // UB = min(UB, GlobalUB);
   1710       auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
   1711       auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
   1712           CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
   1713       CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
   1714       // IV = LB;
   1715       CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
   1716       // while (idx <= UB) { BODY; ++idx; }
   1717       CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
   1718                            [](CodeGenFunction &) {});
   1719       // Tell the runtime we are done.
   1720       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart());
   1721       CGF.EmitOMPReductionClauseFinal(S);
   1722 
   1723       // Emit final copy of the lastprivate variables if IsLastIter != 0.
   1724       if (HasLastprivates)
   1725         CGF.EmitOMPLastprivateClauseFinal(
   1726             S, CGF.Builder.CreateIsNotNull(
   1727                    CGF.EmitLoadOfScalar(IL, S.getLocStart())));
   1728     };
   1729 
   1730     bool HasCancel = false;
   1731     if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
   1732       HasCancel = OSD->hasCancel();
   1733     else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
   1734       HasCancel = OPSD->hasCancel();
   1735     CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
   1736                                                 HasCancel);
   1737     // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
   1738     // clause. Otherwise the barrier will be generated by the codegen for the
   1739     // directive.
   1740     if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
   1741       // Emit implicit barrier to synchronize threads and avoid data races on
   1742       // initialization of firstprivate variables.
   1743       CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
   1744                                              OMPD_unknown);
   1745     }
   1746     return OMPD_sections;
   1747   }
   1748   // If only one section is found - no need to generate loop, emit as a single
   1749   // region.
   1750   bool HasFirstprivates;
   1751   // No need to generate reductions for sections with single section region, we
   1752   // can use original shared variables for all operations.
   1753   bool HasReductions = S.hasClausesOfKind<OMPReductionClause>();
   1754   // No need to generate lastprivates for sections with single section region,
   1755   // we can use original shared variable for all calculations with barrier at
   1756   // the end of the sections.
   1757   bool HasLastprivates = S.hasClausesOfKind<OMPLastprivateClause>();
   1758   auto &&CodeGen = [Stmt, &S, &HasFirstprivates](CodeGenFunction &CGF) {
   1759     CodeGenFunction::OMPPrivateScope SingleScope(CGF);
   1760     HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope);
   1761     CGF.EmitOMPPrivateClause(S, SingleScope);
   1762     (void)SingleScope.Privatize();
   1763 
   1764     CGF.EmitStmt(Stmt);
   1765   };
   1766   CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
   1767                                           llvm::None, llvm::None, llvm::None,
   1768                                           llvm::None);
   1769   // Emit barrier for firstprivates, lastprivates or reductions only if
   1770   // 'sections' directive has 'nowait' clause. Otherwise the barrier will be
   1771   // generated by the codegen for the directive.
   1772   if ((HasFirstprivates || HasLastprivates || HasReductions) &&
   1773       S.getSingleClause<OMPNowaitClause>()) {
   1774     // Emit implicit barrier to synchronize threads and avoid data races on
   1775     // initialization of firstprivate variables.
   1776     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_unknown,
   1777                                            /*EmitChecks=*/false,
   1778                                            /*ForceSimpleCall=*/true);
   1779   }
   1780   return OMPD_single;
   1781 }
   1782 
   1783 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
   1784   LexicalScope Scope(*this, S.getSourceRange());
   1785   OpenMPDirectiveKind EmittedAs = EmitSections(S);
   1786   // Emit an implicit barrier at the end.
   1787   if (!S.getSingleClause<OMPNowaitClause>()) {
   1788     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs);
   1789   }
   1790 }
   1791 
   1792 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
   1793   LexicalScope Scope(*this, S.getSourceRange());
   1794   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
   1795     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
   1796   };
   1797   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
   1798                                               S.hasCancel());
   1799 }
   1800 
   1801 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
   1802   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
   1803   llvm::SmallVector<const Expr *, 8> DestExprs;
   1804   llvm::SmallVector<const Expr *, 8> SrcExprs;
   1805   llvm::SmallVector<const Expr *, 8> AssignmentOps;
   1806   // Check if there are any 'copyprivate' clauses associated with this
   1807   // 'single'
   1808   // construct.
   1809   // Build a list of copyprivate variables along with helper expressions
   1810   // (<source>, <destination>, <destination>=<source> expressions)
   1811   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
   1812     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
   1813     DestExprs.append(C->destination_exprs().begin(),
   1814                      C->destination_exprs().end());
   1815     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
   1816     AssignmentOps.append(C->assignment_ops().begin(),
   1817                          C->assignment_ops().end());
   1818   }
   1819   LexicalScope Scope(*this, S.getSourceRange());
   1820   // Emit code for 'single' region along with 'copyprivate' clauses
   1821   bool HasFirstprivates;
   1822   auto &&CodeGen = [&S, &HasFirstprivates](CodeGenFunction &CGF) {
   1823     CodeGenFunction::OMPPrivateScope SingleScope(CGF);
   1824     HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope);
   1825     CGF.EmitOMPPrivateClause(S, SingleScope);
   1826     (void)SingleScope.Privatize();
   1827 
   1828     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
   1829   };
   1830   CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
   1831                                           CopyprivateVars, DestExprs, SrcExprs,
   1832                                           AssignmentOps);
   1833   // Emit an implicit barrier at the end (to avoid data race on firstprivate
   1834   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
   1835   if ((!S.getSingleClause<OMPNowaitClause>() || HasFirstprivates) &&
   1836       CopyprivateVars.empty()) {
   1837     CGM.getOpenMPRuntime().emitBarrierCall(
   1838         *this, S.getLocStart(),
   1839         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
   1840   }
   1841 }
   1842 
   1843 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
   1844   LexicalScope Scope(*this, S.getSourceRange());
   1845   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
   1846     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
   1847   };
   1848   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
   1849 }
   1850 
   1851 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
   1852   LexicalScope Scope(*this, S.getSourceRange());
   1853   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
   1854     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
   1855   };
   1856   Expr *Hint = nullptr;
   1857   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
   1858     Hint = HintClause->getHint();
   1859   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
   1860                                             S.getDirectiveName().getAsString(),
   1861                                             CodeGen, S.getLocStart(), Hint);
   1862 }
   1863 
   1864 void CodeGenFunction::EmitOMPParallelForDirective(
   1865     const OMPParallelForDirective &S) {
   1866   // Emit directive as a combined directive that consists of two implicit
   1867   // directives: 'parallel' with 'for' directive.
   1868   LexicalScope Scope(*this, S.getSourceRange());
   1869   (void)emitScheduleClause(*this, S, /*OuterRegion=*/true);
   1870   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
   1871     CGF.EmitOMPWorksharingLoop(S);
   1872   };
   1873   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen);
   1874 }
   1875 
   1876 void CodeGenFunction::EmitOMPParallelForSimdDirective(
   1877     const OMPParallelForSimdDirective &S) {
   1878   // Emit directive as a combined directive that consists of two implicit
   1879   // directives: 'parallel' with 'for' directive.
   1880   LexicalScope Scope(*this, S.getSourceRange());
   1881   (void)emitScheduleClause(*this, S, /*OuterRegion=*/true);
   1882   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
   1883     CGF.EmitOMPWorksharingLoop(S);
   1884   };
   1885   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen);
   1886 }
   1887 
   1888 void CodeGenFunction::EmitOMPParallelSectionsDirective(
   1889     const OMPParallelSectionsDirective &S) {
   1890   // Emit directive as a combined directive that consists of two implicit
   1891   // directives: 'parallel' with 'sections' directive.
   1892   LexicalScope Scope(*this, S.getSourceRange());
   1893   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
   1894     (void)CGF.EmitSections(S);
   1895   };
   1896   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen);
   1897 }
   1898 
   1899 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
   1900   // Emit outlined function for task construct.
   1901   LexicalScope Scope(*this, S.getSourceRange());
   1902   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
   1903   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
   1904   auto *I = CS->getCapturedDecl()->param_begin();
   1905   auto *PartId = std::next(I);
   1906   // The first function argument for tasks is a thread id, the second one is a
   1907   // part id (0 for tied tasks, >=0 for untied task).
   1908   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
   1909   // Get list of private variables.
   1910   llvm::SmallVector<const Expr *, 8> PrivateVars;
   1911   llvm::SmallVector<const Expr *, 8> PrivateCopies;
   1912   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
   1913     auto IRef = C->varlist_begin();
   1914     for (auto *IInit : C->private_copies()) {
   1915       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
   1916       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
   1917         PrivateVars.push_back(*IRef);
   1918         PrivateCopies.push_back(IInit);
   1919       }
   1920       ++IRef;
   1921     }
   1922   }
   1923   EmittedAsPrivate.clear();
   1924   // Get list of firstprivate variables.
   1925   llvm::SmallVector<const Expr *, 8> FirstprivateVars;
   1926   llvm::SmallVector<const Expr *, 8> FirstprivateCopies;
   1927   llvm::SmallVector<const Expr *, 8> FirstprivateInits;
   1928   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
   1929     auto IRef = C->varlist_begin();
   1930     auto IElemInitRef = C->inits().begin();
   1931     for (auto *IInit : C->private_copies()) {
   1932       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
   1933       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
   1934         FirstprivateVars.push_back(*IRef);
   1935         FirstprivateCopies.push_back(IInit);
   1936         FirstprivateInits.push_back(*IElemInitRef);
   1937       }
   1938       ++IRef, ++IElemInitRef;
   1939     }
   1940   }
   1941   // Build list of dependences.
   1942   llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8>
   1943       Dependences;
   1944   for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
   1945     for (auto *IRef : C->varlists()) {
   1946       Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
   1947     }
   1948   }
   1949   auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars](
   1950       CodeGenFunction &CGF) {
   1951     // Set proper addresses for generated private copies.
   1952     auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
   1953     OMPPrivateScope Scope(CGF);
   1954     if (!PrivateVars.empty() || !FirstprivateVars.empty()) {
   1955       auto *CopyFn = CGF.Builder.CreateLoad(
   1956           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
   1957       auto *PrivatesPtr = CGF.Builder.CreateLoad(
   1958           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
   1959       // Map privates.
   1960       llvm::SmallVector<std::pair<const VarDecl *, Address>, 16>
   1961           PrivatePtrs;
   1962       llvm::SmallVector<llvm::Value *, 16> CallArgs;
   1963       CallArgs.push_back(PrivatesPtr);
   1964       for (auto *E : PrivateVars) {
   1965         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
   1966         Address PrivatePtr =
   1967             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
   1968         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
   1969         CallArgs.push_back(PrivatePtr.getPointer());
   1970       }
   1971       for (auto *E : FirstprivateVars) {
   1972         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
   1973         Address PrivatePtr =
   1974             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
   1975         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
   1976         CallArgs.push_back(PrivatePtr.getPointer());
   1977       }
   1978       CGF.EmitRuntimeCall(CopyFn, CallArgs);
   1979       for (auto &&Pair : PrivatePtrs) {
   1980         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
   1981                             CGF.getContext().getDeclAlign(Pair.first));
   1982         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
   1983       }
   1984     }
   1985     (void)Scope.Privatize();
   1986     if (*PartId) {
   1987       // TODO: emit code for untied tasks.
   1988     }
   1989     CGF.EmitStmt(CS->getCapturedStmt());
   1990   };
   1991   auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
   1992       S, *I, OMPD_task, CodeGen);
   1993   // Check if we should emit tied or untied task.
   1994   bool Tied = !S.getSingleClause<OMPUntiedClause>();
   1995   // Check if the task is final
   1996   llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
   1997   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
   1998     // If the condition constant folds and can be elided, try to avoid emitting
   1999     // the condition and the dead arm of the if/else.
   2000     auto *Cond = Clause->getCondition();
   2001     bool CondConstant;
   2002     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
   2003       Final.setInt(CondConstant);
   2004     else
   2005       Final.setPointer(EvaluateExprAsBool(Cond));
   2006   } else {
   2007     // By default the task is not final.
   2008     Final.setInt(/*IntVal=*/false);
   2009   }
   2010   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
   2011   const Expr *IfCond = nullptr;
   2012   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
   2013     if (C->getNameModifier() == OMPD_unknown ||
   2014         C->getNameModifier() == OMPD_task) {
   2015       IfCond = C->getCondition();
   2016       break;
   2017     }
   2018   }
   2019   CGM.getOpenMPRuntime().emitTaskCall(
   2020       *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy,
   2021       CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars,
   2022       FirstprivateCopies, FirstprivateInits, Dependences);
   2023 }
   2024 
   2025 void CodeGenFunction::EmitOMPTaskyieldDirective(
   2026     const OMPTaskyieldDirective &S) {
   2027   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
   2028 }
   2029 
   2030 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
   2031   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
   2032 }
   2033 
   2034 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
   2035   CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
   2036 }
   2037 
   2038 void CodeGenFunction::EmitOMPTaskgroupDirective(
   2039     const OMPTaskgroupDirective &S) {
   2040   LexicalScope Scope(*this, S.getSourceRange());
   2041   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
   2042     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
   2043   };
   2044   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
   2045 }
   2046 
   2047 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
   2048   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
   2049     if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
   2050       return llvm::makeArrayRef(FlushClause->varlist_begin(),
   2051                                 FlushClause->varlist_end());
   2052     }
   2053     return llvm::None;
   2054   }(), S.getLocStart());
   2055 }
   2056 
   2057 void CodeGenFunction::EmitOMPDistributeDirective(
   2058     const OMPDistributeDirective &S) {
   2059   llvm_unreachable("CodeGen for 'omp distribute' is not supported yet.");
   2060 }
   2061 
   2062 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
   2063                                                    const CapturedStmt *S) {
   2064   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
   2065   CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
   2066   CGF.CapturedStmtInfo = &CapStmtInfo;
   2067   auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
   2068   Fn->addFnAttr(llvm::Attribute::NoInline);
   2069   return Fn;
   2070 }
   2071 
   2072 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
   2073   if (!S.getAssociatedStmt())
   2074     return;
   2075   LexicalScope Scope(*this, S.getSourceRange());
   2076   auto *C = S.getSingleClause<OMPSIMDClause>();
   2077   auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) {
   2078     if (C) {
   2079       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
   2080       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
   2081       CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
   2082       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
   2083       CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars);
   2084     } else {
   2085       CGF.EmitStmt(
   2086           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
   2087     }
   2088   };
   2089   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
   2090 }
   2091 
   2092 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
   2093                                          QualType SrcType, QualType DestType,
   2094                                          SourceLocation Loc) {
   2095   assert(CGF.hasScalarEvaluationKind(DestType) &&
   2096          "DestType must have scalar evaluation kind.");
   2097   assert(!Val.isAggregate() && "Must be a scalar or complex.");
   2098   return Val.isScalar()
   2099              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
   2100                                         Loc)
   2101              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
   2102                                                  DestType, Loc);
   2103 }
   2104 
   2105 static CodeGenFunction::ComplexPairTy
   2106 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
   2107                       QualType DestType, SourceLocation Loc) {
   2108   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
   2109          "DestType must have complex evaluation kind.");
   2110   CodeGenFunction::ComplexPairTy ComplexVal;
   2111   if (Val.isScalar()) {
   2112     // Convert the input element to the element type of the complex.
   2113     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
   2114     auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
   2115                                               DestElementType, Loc);
   2116     ComplexVal = CodeGenFunction::ComplexPairTy(
   2117         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
   2118   } else {
   2119     assert(Val.isComplex() && "Must be a scalar or complex.");
   2120     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
   2121     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
   2122     ComplexVal.first = CGF.EmitScalarConversion(
   2123         Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
   2124     ComplexVal.second = CGF.EmitScalarConversion(
   2125         Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
   2126   }
   2127   return ComplexVal;
   2128 }
   2129 
   2130 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
   2131                                   LValue LVal, RValue RVal) {
   2132   if (LVal.isGlobalReg()) {
   2133     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
   2134   } else {
   2135     CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::SequentiallyConsistent
   2136                                              : llvm::Monotonic,
   2137                         LVal.isVolatile(), /*IsInit=*/false);
   2138   }
   2139 }
   2140 
   2141 static void emitSimpleStore(CodeGenFunction &CGF, LValue LVal, RValue RVal,
   2142                             QualType RValTy, SourceLocation Loc) {
   2143   switch (CGF.getEvaluationKind(LVal.getType())) {
   2144   case TEK_Scalar:
   2145     CGF.EmitStoreThroughLValue(RValue::get(convertToScalarValue(
   2146                                    CGF, RVal, RValTy, LVal.getType(), Loc)),
   2147                                LVal);
   2148     break;
   2149   case TEK_Complex:
   2150     CGF.EmitStoreOfComplex(
   2151         convertToComplexValue(CGF, RVal, RValTy, LVal.getType(), Loc), LVal,
   2152         /*isInit=*/false);
   2153     break;
   2154   case TEK_Aggregate:
   2155     llvm_unreachable("Must be a scalar or complex.");
   2156   }
   2157 }
   2158 
   2159 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
   2160                                   const Expr *X, const Expr *V,
   2161                                   SourceLocation Loc) {
   2162   // v = x;
   2163   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
   2164   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
   2165   LValue XLValue = CGF.EmitLValue(X);
   2166   LValue VLValue = CGF.EmitLValue(V);
   2167   RValue Res = XLValue.isGlobalReg()
   2168                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
   2169                    : CGF.EmitAtomicLoad(XLValue, Loc,
   2170                                         IsSeqCst ? llvm::SequentiallyConsistent
   2171                                                  : llvm::Monotonic,
   2172                                         XLValue.isVolatile());
   2173   // OpenMP, 2.12.6, atomic Construct
   2174   // Any atomic construct with a seq_cst clause forces the atomically
   2175   // performed operation to include an implicit flush operation without a
   2176   // list.
   2177   if (IsSeqCst)
   2178     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
   2179   emitSimpleStore(CGF, VLValue, Res, X->getType().getNonReferenceType(), Loc);
   2180 }
   2181 
   2182 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
   2183                                    const Expr *X, const Expr *E,
   2184                                    SourceLocation Loc) {
   2185   // x = expr;
   2186   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
   2187   emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
   2188   // OpenMP, 2.12.6, atomic Construct
   2189   // Any atomic construct with a seq_cst clause forces the atomically
   2190   // performed operation to include an implicit flush operation without a
   2191   // list.
   2192   if (IsSeqCst)
   2193     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
   2194 }
   2195 
   2196 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
   2197                                                 RValue Update,
   2198                                                 BinaryOperatorKind BO,
   2199                                                 llvm::AtomicOrdering AO,
   2200                                                 bool IsXLHSInRHSPart) {
   2201   auto &Context = CGF.CGM.getContext();
   2202   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
   2203   // expression is simple and atomic is allowed for the given type for the
   2204   // target platform.
   2205   if (BO == BO_Comma || !Update.isScalar() ||
   2206       !Update.getScalarVal()->getType()->isIntegerTy() ||
   2207       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
   2208                         (Update.getScalarVal()->getType() !=
   2209                          X.getAddress().getElementType())) ||
   2210       !X.getAddress().getElementType()->isIntegerTy() ||
   2211       !Context.getTargetInfo().hasBuiltinAtomic(
   2212           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
   2213     return std::make_pair(false, RValue::get(nullptr));
   2214 
   2215   llvm::AtomicRMWInst::BinOp RMWOp;
   2216   switch (BO) {
   2217   case BO_Add:
   2218     RMWOp = llvm::AtomicRMWInst::Add;
   2219     break;
   2220   case BO_Sub:
   2221     if (!IsXLHSInRHSPart)
   2222       return std::make_pair(false, RValue::get(nullptr));
   2223     RMWOp = llvm::AtomicRMWInst::Sub;
   2224     break;
   2225   case BO_And:
   2226     RMWOp = llvm::AtomicRMWInst::And;
   2227     break;
   2228   case BO_Or:
   2229     RMWOp = llvm::AtomicRMWInst::Or;
   2230     break;
   2231   case BO_Xor:
   2232     RMWOp = llvm::AtomicRMWInst::Xor;
   2233     break;
   2234   case BO_LT:
   2235     RMWOp = X.getType()->hasSignedIntegerRepresentation()
   2236                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
   2237                                    : llvm::AtomicRMWInst::Max)
   2238                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
   2239                                    : llvm::AtomicRMWInst::UMax);
   2240     break;
   2241   case BO_GT:
   2242     RMWOp = X.getType()->hasSignedIntegerRepresentation()
   2243                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
   2244                                    : llvm::AtomicRMWInst::Min)
   2245                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
   2246                                    : llvm::AtomicRMWInst::UMin);
   2247     break;
   2248   case BO_Assign:
   2249     RMWOp = llvm::AtomicRMWInst::Xchg;
   2250     break;
   2251   case BO_Mul:
   2252   case BO_Div:
   2253   case BO_Rem:
   2254   case BO_Shl:
   2255   case BO_Shr:
   2256   case BO_LAnd:
   2257   case BO_LOr:
   2258     return std::make_pair(false, RValue::get(nullptr));
   2259   case BO_PtrMemD:
   2260   case BO_PtrMemI:
   2261   case BO_LE:
   2262   case BO_GE:
   2263   case BO_EQ:
   2264   case BO_NE:
   2265   case BO_AddAssign:
   2266   case BO_SubAssign:
   2267   case BO_AndAssign:
   2268   case BO_OrAssign:
   2269   case BO_XorAssign:
   2270   case BO_MulAssign:
   2271   case BO_DivAssign:
   2272   case BO_RemAssign:
   2273   case BO_ShlAssign:
   2274   case BO_ShrAssign:
   2275   case BO_Comma:
   2276     llvm_unreachable("Unsupported atomic update operation");
   2277   }
   2278   auto *UpdateVal = Update.getScalarVal();
   2279   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
   2280     UpdateVal = CGF.Builder.CreateIntCast(
   2281         IC, X.getAddress().getElementType(),
   2282         X.getType()->hasSignedIntegerRepresentation());
   2283   }
   2284   auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
   2285   return std::make_pair(true, RValue::get(Res));
   2286 }
   2287 
   2288 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
   2289     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
   2290     llvm::AtomicOrdering AO, SourceLocation Loc,
   2291     const llvm::function_ref<RValue(RValue)> &CommonGen) {
   2292   // Update expressions are allowed to have the following forms:
   2293   // x binop= expr; -> xrval + expr;
   2294   // x++, ++x -> xrval + 1;
   2295   // x--, --x -> xrval - 1;
   2296   // x = x binop expr; -> xrval binop expr
   2297   // x = expr Op x; - > expr binop xrval;
   2298   auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
   2299   if (!Res.first) {
   2300     if (X.isGlobalReg()) {
   2301       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
   2302       // 'xrval'.
   2303       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
   2304     } else {
   2305       // Perform compare-and-swap procedure.
   2306       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
   2307     }
   2308   }
   2309   return Res;
   2310 }
   2311 
   2312 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
   2313                                     const Expr *X, const Expr *E,
   2314                                     const Expr *UE, bool IsXLHSInRHSPart,
   2315                                     SourceLocation Loc) {
   2316   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
   2317          "Update expr in 'atomic update' must be a binary operator.");
   2318   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
   2319   // Update expressions are allowed to have the following forms:
   2320   // x binop= expr; -> xrval + expr;
   2321   // x++, ++x -> xrval + 1;
   2322   // x--, --x -> xrval - 1;
   2323   // x = x binop expr; -> xrval binop expr
   2324   // x = expr Op x; - > expr binop xrval;
   2325   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
   2326   LValue XLValue = CGF.EmitLValue(X);
   2327   RValue ExprRValue = CGF.EmitAnyExpr(E);
   2328   auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
   2329   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
   2330   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
   2331   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
   2332   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
   2333   auto Gen =
   2334       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
   2335         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
   2336         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
   2337         return CGF.EmitAnyExpr(UE);
   2338       };
   2339   (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
   2340       XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
   2341   // OpenMP, 2.12.6, atomic Construct
   2342   // Any atomic construct with a seq_cst clause forces the atomically
   2343   // performed operation to include an implicit flush operation without a
   2344   // list.
   2345   if (IsSeqCst)
   2346     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
   2347 }
   2348 
   2349 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
   2350                             QualType SourceType, QualType ResType,
   2351                             SourceLocation Loc) {
   2352   switch (CGF.getEvaluationKind(ResType)) {
   2353   case TEK_Scalar:
   2354     return RValue::get(
   2355         convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
   2356   case TEK_Complex: {
   2357     auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
   2358     return RValue::getComplex(Res.first, Res.second);
   2359   }
   2360   case TEK_Aggregate:
   2361     break;
   2362   }
   2363   llvm_unreachable("Must be a scalar or complex.");
   2364 }
   2365 
   2366 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
   2367                                      bool IsPostfixUpdate, const Expr *V,
   2368                                      const Expr *X, const Expr *E,
   2369                                      const Expr *UE, bool IsXLHSInRHSPart,
   2370                                      SourceLocation Loc) {
   2371   assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
   2372   assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
   2373   RValue NewVVal;
   2374   LValue VLValue = CGF.EmitLValue(V);
   2375   LValue XLValue = CGF.EmitLValue(X);
   2376   RValue ExprRValue = CGF.EmitAnyExpr(E);
   2377   auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
   2378   QualType NewVValType;
   2379   if (UE) {
   2380     // 'x' is updated with some additional value.
   2381     assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
   2382            "Update expr in 'atomic capture' must be a binary operator.");
   2383     auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
   2384     // Update expressions are allowed to have the following forms:
   2385     // x binop= expr; -> xrval + expr;
   2386     // x++, ++x -> xrval + 1;
   2387     // x--, --x -> xrval - 1;
   2388     // x = x binop expr; -> xrval binop expr
   2389     // x = expr Op x; - > expr binop xrval;
   2390     auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
   2391     auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
   2392     auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
   2393     NewVValType = XRValExpr->getType();
   2394     auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
   2395     auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
   2396                   IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue {
   2397       CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
   2398       CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
   2399       RValue Res = CGF.EmitAnyExpr(UE);
   2400       NewVVal = IsPostfixUpdate ? XRValue : Res;
   2401       return Res;
   2402     };
   2403     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
   2404         XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
   2405     if (Res.first) {
   2406       // 'atomicrmw' instruction was generated.
   2407       if (IsPostfixUpdate) {
   2408         // Use old value from 'atomicrmw'.
   2409         NewVVal = Res.second;
   2410       } else {
   2411         // 'atomicrmw' does not provide new value, so evaluate it using old
   2412         // value of 'x'.
   2413         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
   2414         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
   2415         NewVVal = CGF.EmitAnyExpr(UE);
   2416       }
   2417     }
   2418   } else {
   2419     // 'x' is simply rewritten with some 'expr'.
   2420     NewVValType = X->getType().getNonReferenceType();
   2421     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
   2422                                X->getType().getNonReferenceType(), Loc);
   2423     auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue {
   2424       NewVVal = XRValue;
   2425       return ExprRValue;
   2426     };
   2427     // Try to perform atomicrmw xchg, otherwise simple exchange.
   2428     auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
   2429         XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
   2430         Loc, Gen);
   2431     if (Res.first) {
   2432       // 'atomicrmw' instruction was generated.
   2433       NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
   2434     }
   2435   }
   2436   // Emit post-update store to 'v' of old/new 'x' value.
   2437   emitSimpleStore(CGF, VLValue, NewVVal, NewVValType, Loc);
   2438   // OpenMP, 2.12.6, atomic Construct
   2439   // Any atomic construct with a seq_cst clause forces the atomically
   2440   // performed operation to include an implicit flush operation without a
   2441   // list.
   2442   if (IsSeqCst)
   2443     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
   2444 }
   2445 
   2446 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
   2447                               bool IsSeqCst, bool IsPostfixUpdate,
   2448                               const Expr *X, const Expr *V, const Expr *E,
   2449                               const Expr *UE, bool IsXLHSInRHSPart,
   2450                               SourceLocation Loc) {
   2451   switch (Kind) {
   2452   case OMPC_read:
   2453     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
   2454     break;
   2455   case OMPC_write:
   2456     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
   2457     break;
   2458   case OMPC_unknown:
   2459   case OMPC_update:
   2460     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
   2461     break;
   2462   case OMPC_capture:
   2463     EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
   2464                              IsXLHSInRHSPart, Loc);
   2465     break;
   2466   case OMPC_if:
   2467   case OMPC_final:
   2468   case OMPC_num_threads:
   2469   case OMPC_private:
   2470   case OMPC_firstprivate:
   2471   case OMPC_lastprivate:
   2472   case OMPC_reduction:
   2473   case OMPC_safelen:
   2474   case OMPC_simdlen:
   2475   case OMPC_collapse:
   2476   case OMPC_default:
   2477   case OMPC_seq_cst:
   2478   case OMPC_shared:
   2479   case OMPC_linear:
   2480   case OMPC_aligned:
   2481   case OMPC_copyin:
   2482   case OMPC_copyprivate:
   2483   case OMPC_flush:
   2484   case OMPC_proc_bind:
   2485   case OMPC_schedule:
   2486   case OMPC_ordered:
   2487   case OMPC_nowait:
   2488   case OMPC_untied:
   2489   case OMPC_threadprivate:
   2490   case OMPC_depend:
   2491   case OMPC_mergeable:
   2492   case OMPC_device:
   2493   case OMPC_threads:
   2494   case OMPC_simd:
   2495   case OMPC_map:
   2496   case OMPC_num_teams:
   2497   case OMPC_thread_limit:
   2498   case OMPC_priority:
   2499   case OMPC_grainsize:
   2500   case OMPC_nogroup:
   2501   case OMPC_num_tasks:
   2502   case OMPC_hint:
   2503     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
   2504   }
   2505 }
   2506 
   2507 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
   2508   bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
   2509   OpenMPClauseKind Kind = OMPC_unknown;
   2510   for (auto *C : S.clauses()) {
   2511     // Find first clause (skip seq_cst clause, if it is first).
   2512     if (C->getClauseKind() != OMPC_seq_cst) {
   2513       Kind = C->getClauseKind();
   2514       break;
   2515     }
   2516   }
   2517 
   2518   const auto *CS =
   2519       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
   2520   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
   2521     enterFullExpression(EWC);
   2522   }
   2523   // Processing for statements under 'atomic capture'.
   2524   if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
   2525     for (const auto *C : Compound->body()) {
   2526       if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
   2527         enterFullExpression(EWC);
   2528       }
   2529     }
   2530   }
   2531 
   2532   LexicalScope Scope(*this, S.getSourceRange());
   2533   auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF) {
   2534     CGF.EmitStopPoint(CS);
   2535     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
   2536                       S.getV(), S.getExpr(), S.getUpdateExpr(),
   2537                       S.isXLHSInRHSPart(), S.getLocStart());
   2538   };
   2539   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
   2540 }
   2541 
   2542 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
   2543   LexicalScope Scope(*this, S.getSourceRange());
   2544   const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
   2545 
   2546   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
   2547   GenerateOpenMPCapturedVars(CS, CapturedVars);
   2548 
   2549   // Emit target region as a standalone region.
   2550   auto &&CodeGen = [&CS](CodeGenFunction &CGF) {
   2551     CGF.EmitStmt(CS.getCapturedStmt());
   2552   };
   2553 
   2554   // Obtain the target region outlined function.
   2555   llvm::Value *Fn =
   2556       CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, CodeGen);
   2557 
   2558   // Check if we have any if clause associated with the directive.
   2559   const Expr *IfCond = nullptr;
   2560 
   2561   if (auto *C = S.getSingleClause<OMPIfClause>()) {
   2562     IfCond = C->getCondition();
   2563   }
   2564 
   2565   // Check if we have any device clause associated with the directive.
   2566   const Expr *Device = nullptr;
   2567   if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
   2568     Device = C->getDevice();
   2569   }
   2570 
   2571   CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, IfCond, Device,
   2572                                         CapturedVars);
   2573 }
   2574 
   2575 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
   2576   llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
   2577 }
   2578 
   2579 void CodeGenFunction::EmitOMPCancellationPointDirective(
   2580     const OMPCancellationPointDirective &S) {
   2581   CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
   2582                                                    S.getCancelRegion());
   2583 }
   2584 
   2585 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
   2586   const Expr *IfCond = nullptr;
   2587   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
   2588     if (C->getNameModifier() == OMPD_unknown ||
   2589         C->getNameModifier() == OMPD_cancel) {
   2590       IfCond = C->getCondition();
   2591       break;
   2592     }
   2593   }
   2594   CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
   2595                                         S.getCancelRegion());
   2596 }
   2597 
   2598 CodeGenFunction::JumpDest
   2599 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
   2600   if (Kind == OMPD_parallel || Kind == OMPD_task)
   2601     return ReturnBlock;
   2602   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
   2603          Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for);
   2604   return BreakContinueStack.back().BreakBlock;
   2605 }
   2606 
   2607 // Generate the instructions for '#pragma omp target data' directive.
   2608 void CodeGenFunction::EmitOMPTargetDataDirective(
   2609     const OMPTargetDataDirective &S) {
   2610   // emit the code inside the construct for now
   2611   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
   2612   CGM.getOpenMPRuntime().emitInlinedDirective(
   2613       *this, OMPD_target_data,
   2614       [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
   2615 }
   2616 
   2617 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
   2618   // emit the code inside the construct for now
   2619   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
   2620   CGM.getOpenMPRuntime().emitInlinedDirective(
   2621       *this, OMPD_taskloop,
   2622       [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
   2623 }
   2624 
   2625 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
   2626     const OMPTaskLoopSimdDirective &S) {
   2627   // emit the code inside the construct for now
   2628   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
   2629   CGM.getOpenMPRuntime().emitInlinedDirective(
   2630       *this, OMPD_taskloop_simd,
   2631       [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
   2632 }
   2633 
   2634