Home | History | Annotate | Download | only in CodeGen
      1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This contains code to emit OpenMP nodes as LLVM code.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "CGOpenMPRuntime.h"
     15 #include "CodeGenFunction.h"
     16 #include "CodeGenModule.h"
     17 #include "TargetInfo.h"
     18 #include "clang/AST/Stmt.h"
     19 #include "clang/AST/StmtOpenMP.h"
     20 using namespace clang;
     21 using namespace CodeGen;
     22 
     23 //===----------------------------------------------------------------------===//
     24 //                              OpenMP Directive Emission
     25 //===----------------------------------------------------------------------===//
     26 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
     27 /// function. Here is the logic:
     28 /// if (Cond) {
     29 ///   CodeGen(true);
     30 /// } else {
     31 ///   CodeGen(false);
     32 /// }
     33 static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
     34                             const std::function<void(bool)> &CodeGen) {
     35   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
     36 
     37   // If the condition constant folds and can be elided, try to avoid emitting
     38   // the condition and the dead arm of the if/else.
     39   bool CondConstant;
     40   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
     41     CodeGen(CondConstant);
     42     return;
     43   }
     44 
     45   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
     46   // emit the conditional branch.
     47   auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then");
     48   auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else");
     49   auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end");
     50   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0);
     51 
     52   // Emit the 'then' code.
     53   CGF.EmitBlock(ThenBlock);
     54   CodeGen(/*ThenBlock*/ true);
     55   CGF.EmitBranch(ContBlock);
     56   // Emit the 'else' code if present.
     57   {
     58     // There is no need to emit line number for unconditional branch.
     59     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
     60     CGF.EmitBlock(ElseBlock);
     61   }
     62   CodeGen(/*ThenBlock*/ false);
     63   {
     64     // There is no need to emit line number for unconditional branch.
     65     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
     66     CGF.EmitBranch(ContBlock);
     67   }
     68   // Emit the continuation block for code after the if.
     69   CGF.EmitBlock(ContBlock, /*IsFinished*/ true);
     70 }
     71 
     72 void CodeGenFunction::EmitOMPAggregateAssign(
     73     llvm::Value *DestAddr, llvm::Value *SrcAddr, QualType OriginalType,
     74     const llvm::function_ref<void(llvm::Value *, llvm::Value *)> &CopyGen) {
     75   // Perform element-by-element initialization.
     76   QualType ElementTy;
     77   auto SrcBegin = SrcAddr;
     78   auto DestBegin = DestAddr;
     79   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
     80   auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin);
     81   // Cast from pointer to array type to pointer to single element.
     82   SrcBegin = Builder.CreatePointerBitCastOrAddrSpaceCast(SrcBegin,
     83                                                          DestBegin->getType());
     84   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
     85   // The basic structure here is a while-do loop.
     86   auto BodyBB = createBasicBlock("omp.arraycpy.body");
     87   auto DoneBB = createBasicBlock("omp.arraycpy.done");
     88   auto IsEmpty =
     89       Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
     90   Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
     91 
     92   // Enter the loop body, making that address the current address.
     93   auto EntryBB = Builder.GetInsertBlock();
     94   EmitBlock(BodyBB);
     95   auto SrcElementCurrent =
     96       Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
     97   SrcElementCurrent->addIncoming(SrcBegin, EntryBB);
     98   auto DestElementCurrent = Builder.CreatePHI(DestBegin->getType(), 2,
     99                                               "omp.arraycpy.destElementPast");
    100   DestElementCurrent->addIncoming(DestBegin, EntryBB);
    101 
    102   // Emit copy.
    103   CopyGen(DestElementCurrent, SrcElementCurrent);
    104 
    105   // Shift the address forward by one element.
    106   auto DestElementNext = Builder.CreateConstGEP1_32(
    107       DestElementCurrent, /*Idx0=*/1, "omp.arraycpy.dest.element");
    108   auto SrcElementNext = Builder.CreateConstGEP1_32(
    109       SrcElementCurrent, /*Idx0=*/1, "omp.arraycpy.src.element");
    110   // Check whether we've reached the end.
    111   auto Done =
    112       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
    113   Builder.CreateCondBr(Done, DoneBB, BodyBB);
    114   DestElementCurrent->addIncoming(DestElementNext, Builder.GetInsertBlock());
    115   SrcElementCurrent->addIncoming(SrcElementNext, Builder.GetInsertBlock());
    116 
    117   // Done.
    118   EmitBlock(DoneBB, /*IsFinished=*/true);
    119 }
    120 
    121 void CodeGenFunction::EmitOMPCopy(CodeGenFunction &CGF,
    122                                   QualType OriginalType, llvm::Value *DestAddr,
    123                                   llvm::Value *SrcAddr, const VarDecl *DestVD,
    124                                   const VarDecl *SrcVD, const Expr *Copy) {
    125   if (OriginalType->isArrayType()) {
    126     auto *BO = dyn_cast<BinaryOperator>(Copy);
    127     if (BO && BO->getOpcode() == BO_Assign) {
    128       // Perform simple memcpy for simple copying.
    129       CGF.EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
    130     } else {
    131       // For arrays with complex element types perform element by element
    132       // copying.
    133       CGF.EmitOMPAggregateAssign(
    134           DestAddr, SrcAddr, OriginalType,
    135           [&CGF, Copy, SrcVD, DestVD](llvm::Value *DestElement,
    136                                           llvm::Value *SrcElement) {
    137             // Working with the single array element, so have to remap
    138             // destination and source variables to corresponding array
    139             // elements.
    140             CodeGenFunction::OMPPrivateScope Remap(CGF);
    141             Remap.addPrivate(DestVD, [DestElement]() -> llvm::Value *{
    142               return DestElement;
    143             });
    144             Remap.addPrivate(
    145                 SrcVD, [SrcElement]() -> llvm::Value *{ return SrcElement; });
    146             (void)Remap.Privatize();
    147             CGF.EmitIgnoredExpr(Copy);
    148           });
    149     }
    150   } else {
    151     // Remap pseudo source variable to private copy.
    152     CodeGenFunction::OMPPrivateScope Remap(CGF);
    153     Remap.addPrivate(SrcVD, [SrcAddr]() -> llvm::Value *{ return SrcAddr; });
    154     Remap.addPrivate(DestVD, [DestAddr]() -> llvm::Value *{ return DestAddr; });
    155     (void)Remap.Privatize();
    156     // Emit copying of the whole variable.
    157     CGF.EmitIgnoredExpr(Copy);
    158   }
    159 }
    160 
    161 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
    162                                                 OMPPrivateScope &PrivateScope) {
    163   auto FirstprivateFilter = [](const OMPClause *C) -> bool {
    164     return C->getClauseKind() == OMPC_firstprivate;
    165   };
    166   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
    167   for (OMPExecutableDirective::filtered_clause_iterator<decltype(
    168            FirstprivateFilter)> I(D.clauses(), FirstprivateFilter);
    169        I; ++I) {
    170     auto *C = cast<OMPFirstprivateClause>(*I);
    171     auto IRef = C->varlist_begin();
    172     auto InitsRef = C->inits().begin();
    173     for (auto IInit : C->private_copies()) {
    174       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
    175       if (EmittedAsFirstprivate.count(OrigVD) == 0) {
    176         EmittedAsFirstprivate.insert(OrigVD);
    177         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
    178         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
    179         bool IsRegistered;
    180         DeclRefExpr DRE(
    181             const_cast<VarDecl *>(OrigVD),
    182             /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
    183                 OrigVD) != nullptr,
    184             (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
    185         auto *OriginalAddr = EmitLValue(&DRE).getAddress();
    186         if (OrigVD->getType()->isArrayType()) {
    187           // Emit VarDecl with copy init for arrays.
    188           // Get the address of the original variable captured in current
    189           // captured region.
    190           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
    191             auto Emission = EmitAutoVarAlloca(*VD);
    192             auto *Init = VD->getInit();
    193             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
    194               // Perform simple memcpy.
    195               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
    196                                   (*IRef)->getType());
    197             } else {
    198               EmitOMPAggregateAssign(
    199                   Emission.getAllocatedAddress(), OriginalAddr,
    200                   (*IRef)->getType(),
    201                   [this, VDInit, Init](llvm::Value *DestElement,
    202                                        llvm::Value *SrcElement) {
    203                     // Clean up any temporaries needed by the initialization.
    204                     RunCleanupsScope InitScope(*this);
    205                     // Emit initialization for single element.
    206                     LocalDeclMap[VDInit] = SrcElement;
    207                     EmitAnyExprToMem(Init, DestElement,
    208                                      Init->getType().getQualifiers(),
    209                                      /*IsInitializer*/ false);
    210                     LocalDeclMap.erase(VDInit);
    211                   });
    212             }
    213             EmitAutoVarCleanups(Emission);
    214             return Emission.getAllocatedAddress();
    215           });
    216         } else {
    217           IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
    218             // Emit private VarDecl with copy init.
    219             // Remap temp VDInit variable to the address of the original
    220             // variable
    221             // (for proper handling of captured global variables).
    222             LocalDeclMap[VDInit] = OriginalAddr;
    223             EmitDecl(*VD);
    224             LocalDeclMap.erase(VDInit);
    225             return GetAddrOfLocalVar(VD);
    226           });
    227         }
    228         assert(IsRegistered &&
    229                "firstprivate var already registered as private");
    230         // Silence the warning about unused variable.
    231         (void)IsRegistered;
    232       }
    233       ++IRef, ++InitsRef;
    234     }
    235   }
    236   return !EmittedAsFirstprivate.empty();
    237 }
    238 
    239 void CodeGenFunction::EmitOMPPrivateClause(
    240     const OMPExecutableDirective &D,
    241     CodeGenFunction::OMPPrivateScope &PrivateScope) {
    242   auto PrivateFilter = [](const OMPClause *C) -> bool {
    243     return C->getClauseKind() == OMPC_private;
    244   };
    245   for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)>
    246            I(D.clauses(), PrivateFilter); I; ++I) {
    247     auto *C = cast<OMPPrivateClause>(*I);
    248     auto IRef = C->varlist_begin();
    249     for (auto IInit : C->private_copies()) {
    250       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
    251       auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
    252       bool IsRegistered =
    253           PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
    254             // Emit private VarDecl with copy init.
    255             EmitDecl(*VD);
    256             return GetAddrOfLocalVar(VD);
    257           });
    258       assert(IsRegistered && "private var already registered as private");
    259       // Silence the warning about unused variable.
    260       (void)IsRegistered;
    261       ++IRef;
    262     }
    263   }
    264 }
    265 
    266 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
    267   // threadprivate_var1 = master_threadprivate_var1;
    268   // operator=(threadprivate_var2, master_threadprivate_var2);
    269   // ...
    270   // __kmpc_barrier(&loc, global_tid);
    271   auto CopyinFilter = [](const OMPClause *C) -> bool {
    272     return C->getClauseKind() == OMPC_copyin;
    273   };
    274   llvm::DenseSet<const VarDecl *> CopiedVars;
    275   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
    276   for (OMPExecutableDirective::filtered_clause_iterator<decltype(CopyinFilter)>
    277            I(D.clauses(), CopyinFilter);
    278        I; ++I) {
    279     auto *C = cast<OMPCopyinClause>(*I);
    280     auto IRef = C->varlist_begin();
    281     auto ISrcRef = C->source_exprs().begin();
    282     auto IDestRef = C->destination_exprs().begin();
    283     for (auto *AssignOp : C->assignment_ops()) {
    284       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
    285       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
    286         // Get the address of the master variable.
    287         auto *MasterAddr = VD->isStaticLocal()
    288                                ? CGM.getStaticLocalDeclAddress(VD)
    289                                : CGM.GetAddrOfGlobal(VD);
    290         // Get the address of the threadprivate variable.
    291         auto *PrivateAddr = EmitLValue(*IRef).getAddress();
    292         if (CopiedVars.size() == 1) {
    293           // At first check if current thread is a master thread. If it is, no
    294           // need to copy data.
    295           CopyBegin = createBasicBlock("copyin.not.master");
    296           CopyEnd = createBasicBlock("copyin.not.master.end");
    297           Builder.CreateCondBr(
    298               Builder.CreateICmpNE(
    299                   Builder.CreatePtrToInt(MasterAddr, CGM.IntPtrTy),
    300                   Builder.CreatePtrToInt(PrivateAddr, CGM.IntPtrTy)),
    301               CopyBegin, CopyEnd);
    302           EmitBlock(CopyBegin);
    303         }
    304         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
    305         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
    306         EmitOMPCopy(*this, (*IRef)->getType(), PrivateAddr, MasterAddr, DestVD,
    307                     SrcVD, AssignOp);
    308       }
    309       ++IRef;
    310       ++ISrcRef;
    311       ++IDestRef;
    312     }
    313   }
    314   if (CopyEnd) {
    315     // Exit out of copying procedure for non-master thread.
    316     EmitBlock(CopyEnd, /*IsFinished=*/true);
    317     return true;
    318   }
    319   return false;
    320 }
    321 
    322 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
    323     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
    324   auto LastprivateFilter = [](const OMPClause *C) -> bool {
    325     return C->getClauseKind() == OMPC_lastprivate;
    326   };
    327   bool HasAtLeastOneLastprivate = false;
    328   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
    329   for (OMPExecutableDirective::filtered_clause_iterator<decltype(
    330            LastprivateFilter)> I(D.clauses(), LastprivateFilter);
    331        I; ++I) {
    332     auto *C = cast<OMPLastprivateClause>(*I);
    333     auto IRef = C->varlist_begin();
    334     auto IDestRef = C->destination_exprs().begin();
    335     for (auto *IInit : C->private_copies()) {
    336       // Keep the address of the original variable for future update at the end
    337       // of the loop.
    338       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
    339       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
    340         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
    341         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> llvm::Value *{
    342           DeclRefExpr DRE(
    343               const_cast<VarDecl *>(OrigVD),
    344               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
    345                   OrigVD) != nullptr,
    346               (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
    347           return EmitLValue(&DRE).getAddress();
    348         });
    349         // Check if the variable is also a firstprivate: in this case IInit is
    350         // not generated. Initialization of this variable will happen in codegen
    351         // for 'firstprivate' clause.
    352         if (!IInit)
    353           continue;
    354         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
    355         bool IsRegistered =
    356             PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
    357               // Emit private VarDecl with copy init.
    358               EmitDecl(*VD);
    359               return GetAddrOfLocalVar(VD);
    360             });
    361         assert(IsRegistered && "lastprivate var already registered as private");
    362         HasAtLeastOneLastprivate = HasAtLeastOneLastprivate || IsRegistered;
    363       }
    364       ++IRef, ++IDestRef;
    365     }
    366   }
    367   return HasAtLeastOneLastprivate;
    368 }
    369 
    370 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
    371     const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) {
    372   // Emit following code:
    373   // if (<IsLastIterCond>) {
    374   //   orig_var1 = private_orig_var1;
    375   //   ...
    376   //   orig_varn = private_orig_varn;
    377   // }
    378   auto *ThenBB = createBasicBlock(".omp.lastprivate.then");
    379   auto *DoneBB = createBasicBlock(".omp.lastprivate.done");
    380   Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
    381   EmitBlock(ThenBB);
    382   {
    383     auto LastprivateFilter = [](const OMPClause *C) -> bool {
    384       return C->getClauseKind() == OMPC_lastprivate;
    385     };
    386     llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
    387     for (OMPExecutableDirective::filtered_clause_iterator<decltype(
    388              LastprivateFilter)> I(D.clauses(), LastprivateFilter);
    389          I; ++I) {
    390       auto *C = cast<OMPLastprivateClause>(*I);
    391       auto IRef = C->varlist_begin();
    392       auto ISrcRef = C->source_exprs().begin();
    393       auto IDestRef = C->destination_exprs().begin();
    394       for (auto *AssignOp : C->assignment_ops()) {
    395         auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
    396         if (AlreadyEmittedVars.insert(PrivateVD->getCanonicalDecl()).second) {
    397           auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
    398           auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
    399           // Get the address of the original variable.
    400           auto *OriginalAddr = GetAddrOfLocalVar(DestVD);
    401           // Get the address of the private variable.
    402           auto *PrivateAddr = GetAddrOfLocalVar(PrivateVD);
    403           EmitOMPCopy(*this, (*IRef)->getType(), OriginalAddr, PrivateAddr,
    404                       DestVD, SrcVD, AssignOp);
    405         }
    406         ++IRef;
    407         ++ISrcRef;
    408         ++IDestRef;
    409       }
    410     }
    411   }
    412   EmitBlock(DoneBB, /*IsFinished=*/true);
    413 }
    414 
    415 void CodeGenFunction::EmitOMPReductionClauseInit(
    416     const OMPExecutableDirective &D,
    417     CodeGenFunction::OMPPrivateScope &PrivateScope) {
    418   auto ReductionFilter = [](const OMPClause *C) -> bool {
    419     return C->getClauseKind() == OMPC_reduction;
    420   };
    421   for (OMPExecutableDirective::filtered_clause_iterator<decltype(
    422            ReductionFilter)> I(D.clauses(), ReductionFilter);
    423        I; ++I) {
    424     auto *C = cast<OMPReductionClause>(*I);
    425     auto ILHS = C->lhs_exprs().begin();
    426     auto IRHS = C->rhs_exprs().begin();
    427     for (auto IRef : C->varlists()) {
    428       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
    429       auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
    430       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
    431       // Store the address of the original variable associated with the LHS
    432       // implicit variable.
    433       PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> llvm::Value *{
    434         DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
    435                         CapturedStmtInfo->lookup(OrigVD) != nullptr,
    436                         IRef->getType(), VK_LValue, IRef->getExprLoc());
    437         return EmitLValue(&DRE).getAddress();
    438       });
    439       // Emit reduction copy.
    440       bool IsRegistered =
    441           PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> llvm::Value *{
    442             // Emit private VarDecl with reduction init.
    443             EmitDecl(*PrivateVD);
    444             return GetAddrOfLocalVar(PrivateVD);
    445           });
    446       assert(IsRegistered && "private var already registered as private");
    447       // Silence the warning about unused variable.
    448       (void)IsRegistered;
    449       ++ILHS, ++IRHS;
    450     }
    451   }
    452 }
    453 
    454 void CodeGenFunction::EmitOMPReductionClauseFinal(
    455     const OMPExecutableDirective &D) {
    456   llvm::SmallVector<const Expr *, 8> LHSExprs;
    457   llvm::SmallVector<const Expr *, 8> RHSExprs;
    458   llvm::SmallVector<const Expr *, 8> ReductionOps;
    459   auto ReductionFilter = [](const OMPClause *C) -> bool {
    460     return C->getClauseKind() == OMPC_reduction;
    461   };
    462   bool HasAtLeastOneReduction = false;
    463   for (OMPExecutableDirective::filtered_clause_iterator<decltype(
    464            ReductionFilter)> I(D.clauses(), ReductionFilter);
    465        I; ++I) {
    466     HasAtLeastOneReduction = true;
    467     auto *C = cast<OMPReductionClause>(*I);
    468     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
    469     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
    470     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
    471   }
    472   if (HasAtLeastOneReduction) {
    473     // Emit nowait reduction if nowait clause is present or directive is a
    474     // parallel directive (it always has implicit barrier).
    475     CGM.getOpenMPRuntime().emitReduction(
    476         *this, D.getLocEnd(), LHSExprs, RHSExprs, ReductionOps,
    477         D.getSingleClause(OMPC_nowait) ||
    478             isOpenMPParallelDirective(D.getDirectiveKind()));
    479   }
    480 }
    481 
    482 /// \brief Emits code for OpenMP parallel directive in the parallel region.
    483 static void emitOMPParallelCall(CodeGenFunction &CGF,
    484                                 const OMPExecutableDirective &S,
    485                                 llvm::Value *OutlinedFn,
    486                                 llvm::Value *CapturedStruct) {
    487   if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) {
    488     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
    489     auto NumThreadsClause = cast<OMPNumThreadsClause>(C);
    490     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
    491                                          /*IgnoreResultAssign*/ true);
    492     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
    493         CGF, NumThreads, NumThreadsClause->getLocStart());
    494   }
    495   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
    496                                               CapturedStruct);
    497 }
    498 
    499 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
    500                                            const OMPExecutableDirective &S,
    501                                            const RegionCodeGenTy &CodeGen) {
    502   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
    503   auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS);
    504   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
    505       S, *CS->getCapturedDecl()->param_begin(), CodeGen);
    506   if (auto C = S.getSingleClause(/*K*/ OMPC_if)) {
    507     auto Cond = cast<OMPIfClause>(C)->getCondition();
    508     EmitOMPIfClause(CGF, Cond, [&](bool ThenBlock) {
    509       if (ThenBlock)
    510         emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct);
    511       else
    512         CGF.CGM.getOpenMPRuntime().emitSerialCall(CGF, S.getLocStart(),
    513                                                   OutlinedFn, CapturedStruct);
    514     });
    515   } else
    516     emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct);
    517 }
    518 
    519 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
    520   LexicalScope Scope(*this, S.getSourceRange());
    521   // Emit parallel region as a standalone region.
    522   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
    523     OMPPrivateScope PrivateScope(CGF);
    524     bool Copyins = CGF.EmitOMPCopyinClause(S);
    525     bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope);
    526     if (Copyins || Firstprivates) {
    527       // Emit implicit barrier to synchronize threads and avoid data races on
    528       // initialization of firstprivate variables or propagation master's thread
    529       // values of threadprivate variables to local instances of that variables
    530       // of all other implicit threads.
    531       CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
    532                                                  OMPD_unknown);
    533     }
    534     CGF.EmitOMPPrivateClause(S, PrivateScope);
    535     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
    536     (void)PrivateScope.Privatize();
    537     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
    538     CGF.EmitOMPReductionClauseFinal(S);
    539     // Emit implicit barrier at the end of the 'parallel' directive.
    540     CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
    541                                                OMPD_unknown);
    542   };
    543   emitCommonOMPParallelDirective(*this, S, CodeGen);
    544 }
    545 
    546 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S,
    547                                       bool SeparateIter) {
    548   RunCleanupsScope BodyScope(*this);
    549   // Update counters values on current iteration.
    550   for (auto I : S.updates()) {
    551     EmitIgnoredExpr(I);
    552   }
    553   // Update the linear variables.
    554   for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
    555     for (auto U : C->updates()) {
    556       EmitIgnoredExpr(U);
    557     }
    558   }
    559 
    560   // On a continue in the body, jump to the end.
    561   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
    562   BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue));
    563   // Emit loop body.
    564   EmitStmt(S.getBody());
    565   // The end (updates/cleanups).
    566   EmitBlock(Continue.getBlock());
    567   BreakContinueStack.pop_back();
    568   if (SeparateIter) {
    569     // TODO: Update lastprivates if the SeparateIter flag is true.
    570     // This will be implemented in a follow-up OMPLastprivateClause patch, but
    571     // result should be still correct without it, as we do not make these
    572     // variables private yet.
    573   }
    574 }
    575 
    576 void CodeGenFunction::EmitOMPInnerLoop(
    577     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
    578     const Expr *IncExpr,
    579     const llvm::function_ref<void(CodeGenFunction &)> &BodyGen) {
    580   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
    581   auto Cnt = getPGORegionCounter(&S);
    582 
    583   // Start the loop with a block that tests the condition.
    584   auto CondBlock = createBasicBlock("omp.inner.for.cond");
    585   EmitBlock(CondBlock);
    586   LoopStack.push(CondBlock);
    587 
    588   // If there are any cleanups between here and the loop-exit scope,
    589   // create a block to stage a loop exit along.
    590   auto ExitBlock = LoopExit.getBlock();
    591   if (RequiresCleanup)
    592     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
    593 
    594   auto LoopBody = createBasicBlock("omp.inner.for.body");
    595 
    596   // Emit condition.
    597   EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, Cnt.getCount());
    598   if (ExitBlock != LoopExit.getBlock()) {
    599     EmitBlock(ExitBlock);
    600     EmitBranchThroughCleanup(LoopExit);
    601   }
    602 
    603   EmitBlock(LoopBody);
    604   Cnt.beginRegion(Builder);
    605 
    606   // Create a block for the increment.
    607   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
    608   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
    609 
    610   BodyGen(*this);
    611 
    612   // Emit "IV = IV + 1" and a back-edge to the condition block.
    613   EmitBlock(Continue.getBlock());
    614   EmitIgnoredExpr(IncExpr);
    615   BreakContinueStack.pop_back();
    616   EmitBranch(CondBlock);
    617   LoopStack.pop();
    618   // Emit the fall-through block.
    619   EmitBlock(LoopExit.getBlock());
    620 }
    621 
    622 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) {
    623   auto IC = S.counters().begin();
    624   for (auto F : S.finals()) {
    625     if (LocalDeclMap.lookup(cast<DeclRefExpr>((*IC))->getDecl())) {
    626       EmitIgnoredExpr(F);
    627     }
    628     ++IC;
    629   }
    630   // Emit the final values of the linear variables.
    631   for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
    632     for (auto F : C->finals()) {
    633       EmitIgnoredExpr(F);
    634     }
    635   }
    636 }
    637 
    638 static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM,
    639                                  const OMPAlignedClause &Clause) {
    640   unsigned ClauseAlignment = 0;
    641   if (auto AlignmentExpr = Clause.getAlignment()) {
    642     auto AlignmentCI =
    643         cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
    644     ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
    645   }
    646   for (auto E : Clause.varlists()) {
    647     unsigned Alignment = ClauseAlignment;
    648     if (Alignment == 0) {
    649       // OpenMP [2.8.1, Description]
    650       // If no optional parameter is specified, implementation-defined default
    651       // alignments for SIMD instructions on the target platforms are assumed.
    652       Alignment = CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment(
    653           E->getType());
    654     }
    655     assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
    656            "alignment is not power of 2");
    657     if (Alignment != 0) {
    658       llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
    659       CGF.EmitAlignmentAssumption(PtrValue, Alignment);
    660     }
    661   }
    662 }
    663 
    664 static void EmitPrivateLoopCounters(CodeGenFunction &CGF,
    665                                     CodeGenFunction::OMPPrivateScope &LoopScope,
    666                                     ArrayRef<Expr *> Counters) {
    667   for (auto *E : Counters) {
    668     auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
    669     bool IsRegistered = LoopScope.addPrivate(VD, [&]() -> llvm::Value * {
    670       // Emit var without initialization.
    671       auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
    672       CGF.EmitAutoVarCleanups(VarEmission);
    673       return VarEmission.getAllocatedAddress();
    674     });
    675     assert(IsRegistered && "counter already registered as private");
    676     // Silence the warning about unused variable.
    677     (void)IsRegistered;
    678   }
    679 }
    680 
    681 static void
    682 EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
    683                       CodeGenFunction::OMPPrivateScope &PrivateScope) {
    684   for (auto Clause : OMPExecutableDirective::linear_filter(D.clauses())) {
    685     for (auto *E : Clause->varlists()) {
    686       auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
    687       bool IsRegistered = PrivateScope.addPrivate(VD, [&]()->llvm::Value * {
    688         // Emit var without initialization.
    689         auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
    690         CGF.EmitAutoVarCleanups(VarEmission);
    691         return VarEmission.getAllocatedAddress();
    692       });
    693       assert(IsRegistered && "linear var already registered as private");
    694       // Silence the warning about unused variable.
    695       (void)IsRegistered;
    696     }
    697   }
    698 }
    699 
    700 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
    701   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
    702     // Pragma 'simd' code depends on presence of 'lastprivate'.
    703     // If present, we have to separate last iteration of the loop:
    704     //
    705     // if (LastIteration != 0) {
    706     //   for (IV in 0..LastIteration-1) BODY;
    707     //   BODY with updates of lastprivate vars;
    708     //   <Final counter/linear vars updates>;
    709     // }
    710     //
    711     // otherwise (when there's no lastprivate):
    712     //
    713     //   for (IV in 0..LastIteration) BODY;
    714     //   <Final counter/linear vars updates>;
    715     //
    716 
    717     // Walk clauses and process safelen/lastprivate.
    718     bool SeparateIter = false;
    719     CGF.LoopStack.setParallel();
    720     CGF.LoopStack.setVectorizerEnable(true);
    721     for (auto C : S.clauses()) {
    722       switch (C->getClauseKind()) {
    723       case OMPC_safelen: {
    724         RValue Len = CGF.EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(),
    725                                      AggValueSlot::ignored(), true);
    726         llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
    727         CGF.LoopStack.setVectorizerWidth(Val->getZExtValue());
    728         // In presence of finite 'safelen', it may be unsafe to mark all
    729         // the memory instructions parallel, because loop-carried
    730         // dependences of 'safelen' iterations are possible.
    731         CGF.LoopStack.setParallel(false);
    732         break;
    733       }
    734       case OMPC_aligned:
    735         EmitOMPAlignedClause(CGF, CGF.CGM, cast<OMPAlignedClause>(*C));
    736         break;
    737       case OMPC_lastprivate:
    738         SeparateIter = true;
    739         break;
    740       default:
    741         // Not handled yet
    742         ;
    743       }
    744     }
    745 
    746     // Emit inits for the linear variables.
    747     for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
    748       for (auto Init : C->inits()) {
    749         auto *D = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
    750         CGF.EmitVarDecl(*D);
    751       }
    752     }
    753 
    754     // Emit the loop iteration variable.
    755     const Expr *IVExpr = S.getIterationVariable();
    756     const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
    757     CGF.EmitVarDecl(*IVDecl);
    758     CGF.EmitIgnoredExpr(S.getInit());
    759 
    760     // Emit the iterations count variable.
    761     // If it is not a variable, Sema decided to calculate iterations count on
    762     // each
    763     // iteration (e.g., it is foldable into a constant).
    764     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
    765       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
    766       // Emit calculation of the iterations count.
    767       CGF.EmitIgnoredExpr(S.getCalcLastIteration());
    768     }
    769 
    770     // Emit the linear steps for the linear clauses.
    771     // If a step is not constant, it is pre-calculated before the loop.
    772     for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
    773       if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
    774         if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
    775           CGF.EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
    776           // Emit calculation of the linear step.
    777           CGF.EmitIgnoredExpr(CS);
    778         }
    779     }
    780 
    781     if (SeparateIter) {
    782       // Emit: if (LastIteration > 0) - begin.
    783       RegionCounter Cnt = CGF.getPGORegionCounter(&S);
    784       auto ThenBlock = CGF.createBasicBlock("simd.if.then");
    785       auto ContBlock = CGF.createBasicBlock("simd.if.end");
    786       CGF.EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock,
    787                                Cnt.getCount());
    788       CGF.EmitBlock(ThenBlock);
    789       Cnt.beginRegion(CGF.Builder);
    790       // Emit 'then' code.
    791       {
    792         OMPPrivateScope LoopScope(CGF);
    793         EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
    794         EmitPrivateLinearVars(CGF, S, LoopScope);
    795         CGF.EmitOMPPrivateClause(S, LoopScope);
    796         (void)LoopScope.Privatize();
    797         CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
    798                              S.getCond(/*SeparateIter=*/true), S.getInc(),
    799                              [&S](CodeGenFunction &CGF) {
    800                                CGF.EmitOMPLoopBody(S);
    801                                CGF.EmitStopPoint(&S);
    802                              });
    803         CGF.EmitOMPLoopBody(S, /* SeparateIter */ true);
    804       }
    805       CGF.EmitOMPSimdFinal(S);
    806       // Emit: if (LastIteration != 0) - end.
    807       CGF.EmitBranch(ContBlock);
    808       CGF.EmitBlock(ContBlock, true);
    809     } else {
    810       {
    811         OMPPrivateScope LoopScope(CGF);
    812         EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
    813         EmitPrivateLinearVars(CGF, S, LoopScope);
    814         CGF.EmitOMPPrivateClause(S, LoopScope);
    815         (void)LoopScope.Privatize();
    816         CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
    817                              S.getCond(/*SeparateIter=*/false), S.getInc(),
    818                              [&S](CodeGenFunction &CGF) {
    819                                CGF.EmitOMPLoopBody(S);
    820                                CGF.EmitStopPoint(&S);
    821                              });
    822       }
    823       CGF.EmitOMPSimdFinal(S);
    824     }
    825   };
    826   CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
    827 }
    828 
    829 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
    830                                           const OMPLoopDirective &S,
    831                                           OMPPrivateScope &LoopScope,
    832                                           llvm::Value *LB, llvm::Value *UB,
    833                                           llvm::Value *ST, llvm::Value *IL,
    834                                           llvm::Value *Chunk) {
    835   auto &RT = CGM.getOpenMPRuntime();
    836 
    837   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
    838   const bool Dynamic = RT.isDynamic(ScheduleKind);
    839 
    840   assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
    841          "static non-chunked schedule does not need outer loop");
    842 
    843   // Emit outer loop.
    844   //
    845   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
    846   // When schedule(dynamic,chunk_size) is specified, the iterations are
    847   // distributed to threads in the team in chunks as the threads request them.
    848   // Each thread executes a chunk of iterations, then requests another chunk,
    849   // until no chunks remain to be distributed. Each chunk contains chunk_size
    850   // iterations, except for the last chunk to be distributed, which may have
    851   // fewer iterations. When no chunk_size is specified, it defaults to 1.
    852   //
    853   // When schedule(guided,chunk_size) is specified, the iterations are assigned
    854   // to threads in the team in chunks as the executing threads request them.
    855   // Each thread executes a chunk of iterations, then requests another chunk,
    856   // until no chunks remain to be assigned. For a chunk_size of 1, the size of
    857   // each chunk is proportional to the number of unassigned iterations divided
    858   // by the number of threads in the team, decreasing to 1. For a chunk_size
    859   // with value k (greater than 1), the size of each chunk is determined in the
    860   // same way, with the restriction that the chunks do not contain fewer than k
    861   // iterations (except for the last chunk to be assigned, which may have fewer
    862   // than k iterations).
    863   //
    864   // When schedule(auto) is specified, the decision regarding scheduling is
    865   // delegated to the compiler and/or runtime system. The programmer gives the
    866   // implementation the freedom to choose any possible mapping of iterations to
    867   // threads in the team.
    868   //
    869   // When schedule(runtime) is specified, the decision regarding scheduling is
    870   // deferred until run time, and the schedule and chunk size are taken from the
    871   // run-sched-var ICV. If the ICV is set to auto, the schedule is
    872   // implementation defined
    873   //
    874   // while(__kmpc_dispatch_next(&LB, &UB)) {
    875   //   idx = LB;
    876   //   while (idx <= UB) { BODY; ++idx; } // inner loop
    877   // }
    878   //
    879   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
    880   // When schedule(static, chunk_size) is specified, iterations are divided into
    881   // chunks of size chunk_size, and the chunks are assigned to the threads in
    882   // the team in a round-robin fashion in the order of the thread number.
    883   //
    884   // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
    885   //   while (idx <= UB) { BODY; ++idx; } // inner loop
    886   //   LB = LB + ST;
    887   //   UB = UB + ST;
    888   // }
    889   //
    890 
    891   const Expr *IVExpr = S.getIterationVariable();
    892   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
    893   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
    894 
    895   RT.emitForInit(
    896       *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB,
    897       (Dynamic ? EmitAnyExpr(S.getLastIteration()).getScalarVal() : UB), ST,
    898       Chunk);
    899 
    900   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
    901 
    902   // Start the loop with a block that tests the condition.
    903   auto CondBlock = createBasicBlock("omp.dispatch.cond");
    904   EmitBlock(CondBlock);
    905   LoopStack.push(CondBlock);
    906 
    907   llvm::Value *BoolCondVal = nullptr;
    908   if (!Dynamic) {
    909     // UB = min(UB, GlobalUB)
    910     EmitIgnoredExpr(S.getEnsureUpperBound());
    911     // IV = LB
    912     EmitIgnoredExpr(S.getInit());
    913     // IV < UB
    914     BoolCondVal = EvaluateExprAsBool(S.getCond(false));
    915   } else {
    916     BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned,
    917                                     IL, LB, UB, ST);
    918   }
    919 
    920   // If there are any cleanups between here and the loop-exit scope,
    921   // create a block to stage a loop exit along.
    922   auto ExitBlock = LoopExit.getBlock();
    923   if (LoopScope.requiresCleanups())
    924     ExitBlock = createBasicBlock("omp.dispatch.cleanup");
    925 
    926   auto LoopBody = createBasicBlock("omp.dispatch.body");
    927   Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
    928   if (ExitBlock != LoopExit.getBlock()) {
    929     EmitBlock(ExitBlock);
    930     EmitBranchThroughCleanup(LoopExit);
    931   }
    932   EmitBlock(LoopBody);
    933 
    934   // Emit "IV = LB" (in case of static schedule, we have already calculated new
    935   // LB for loop condition and emitted it above).
    936   if (Dynamic)
    937     EmitIgnoredExpr(S.getInit());
    938 
    939   // Create a block for the increment.
    940   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
    941   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
    942 
    943   EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
    944                    S.getCond(/*SeparateIter=*/false), S.getInc(),
    945                    [&S](CodeGenFunction &CGF) {
    946                      CGF.EmitOMPLoopBody(S);
    947                      CGF.EmitStopPoint(&S);
    948                    });
    949 
    950   EmitBlock(Continue.getBlock());
    951   BreakContinueStack.pop_back();
    952   if (!Dynamic) {
    953     // Emit "LB = LB + Stride", "UB = UB + Stride".
    954     EmitIgnoredExpr(S.getNextLowerBound());
    955     EmitIgnoredExpr(S.getNextUpperBound());
    956   }
    957 
    958   EmitBranch(CondBlock);
    959   LoopStack.pop();
    960   // Emit the fall-through block.
    961   EmitBlock(LoopExit.getBlock());
    962 
    963   // Tell the runtime we are done.
    964   // FIXME: Also call fini for ordered loops with dynamic scheduling.
    965   if (!Dynamic)
    966     RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
    967 }
    968 
    969 /// \brief Emit a helper variable and return corresponding lvalue.
    970 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
    971                                const DeclRefExpr *Helper) {
    972   auto VDecl = cast<VarDecl>(Helper->getDecl());
    973   CGF.EmitVarDecl(*VDecl);
    974   return CGF.EmitLValue(Helper);
    975 }
    976 
    977 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
    978   // Emit the loop iteration variable.
    979   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
    980   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
    981   EmitVarDecl(*IVDecl);
    982 
    983   // Emit the iterations count variable.
    984   // If it is not a variable, Sema decided to calculate iterations count on each
    985   // iteration (e.g., it is foldable into a constant).
    986   if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
    987     EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
    988     // Emit calculation of the iterations count.
    989     EmitIgnoredExpr(S.getCalcLastIteration());
    990   }
    991 
    992   auto &RT = CGM.getOpenMPRuntime();
    993 
    994   bool HasLastprivateClause;
    995   // Check pre-condition.
    996   {
    997     // Skip the entire loop if we don't meet the precondition.
    998     RegionCounter Cnt = getPGORegionCounter(&S);
    999     auto ThenBlock = createBasicBlock("omp.precond.then");
   1000     auto ContBlock = createBasicBlock("omp.precond.end");
   1001     EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
   1002     EmitBlock(ThenBlock);
   1003     Cnt.beginRegion(Builder);
   1004     // Emit 'then' code.
   1005     {
   1006       // Emit helper vars inits.
   1007       LValue LB =
   1008           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
   1009       LValue UB =
   1010           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
   1011       LValue ST =
   1012           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
   1013       LValue IL =
   1014           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
   1015 
   1016       OMPPrivateScope LoopScope(*this);
   1017       if (EmitOMPFirstprivateClause(S, LoopScope)) {
   1018         // Emit implicit barrier to synchronize threads and avoid data races on
   1019         // initialization of firstprivate variables.
   1020         CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
   1021                                                OMPD_unknown);
   1022       }
   1023       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
   1024       EmitPrivateLoopCounters(*this, LoopScope, S.counters());
   1025       (void)LoopScope.Privatize();
   1026 
   1027       // Detect the loop schedule kind and chunk.
   1028       auto ScheduleKind = OMPC_SCHEDULE_unknown;
   1029       llvm::Value *Chunk = nullptr;
   1030       if (auto C = cast_or_null<OMPScheduleClause>(
   1031               S.getSingleClause(OMPC_schedule))) {
   1032         ScheduleKind = C->getScheduleKind();
   1033         if (auto Ch = C->getChunkSize()) {
   1034           Chunk = EmitScalarExpr(Ch);
   1035           Chunk = EmitScalarConversion(Chunk, Ch->getType(),
   1036                                        S.getIterationVariable()->getType());
   1037         }
   1038       }
   1039       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
   1040       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
   1041       if (RT.isStaticNonchunked(ScheduleKind,
   1042                                 /* Chunked */ Chunk != nullptr)) {
   1043         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
   1044         // When no chunk_size is specified, the iteration space is divided into
   1045         // chunks that are approximately equal in size, and at most one chunk is
   1046         // distributed to each thread. Note that the size of the chunks is
   1047         // unspecified in this case.
   1048         RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
   1049                        IL.getAddress(), LB.getAddress(), UB.getAddress(),
   1050                        ST.getAddress());
   1051         // UB = min(UB, GlobalUB);
   1052         EmitIgnoredExpr(S.getEnsureUpperBound());
   1053         // IV = LB;
   1054         EmitIgnoredExpr(S.getInit());
   1055         // while (idx <= UB) { BODY; ++idx; }
   1056         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
   1057                          S.getCond(/*SeparateIter=*/false), S.getInc(),
   1058                          [&S](CodeGenFunction &CGF) {
   1059                            CGF.EmitOMPLoopBody(S);
   1060                            CGF.EmitStopPoint(&S);
   1061                          });
   1062         // Tell the runtime we are done.
   1063         RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
   1064       } else {
   1065         // Emit the outer loop, which requests its work chunk [LB..UB] from
   1066         // runtime and runs the inner loop to process it.
   1067         EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(),
   1068                             UB.getAddress(), ST.getAddress(), IL.getAddress(),
   1069                             Chunk);
   1070       }
   1071       // Emit final copy of the lastprivate variables if IsLastIter != 0.
   1072       if (HasLastprivateClause)
   1073         EmitOMPLastprivateClauseFinal(
   1074             S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
   1075     }
   1076     // We're now done with the loop, so jump to the continuation block.
   1077     EmitBranch(ContBlock);
   1078     EmitBlock(ContBlock, true);
   1079   }
   1080   return HasLastprivateClause;
   1081 }
   1082 
   1083 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
   1084   LexicalScope Scope(*this, S.getSourceRange());
   1085   bool HasLastprivates = false;
   1086   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
   1087     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
   1088   };
   1089   CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
   1090 
   1091   // Emit an implicit barrier at the end.
   1092   if (!S.getSingleClause(OMPC_nowait) || HasLastprivates) {
   1093     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
   1094   }
   1095 }
   1096 
   1097 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) {
   1098   llvm_unreachable("CodeGen for 'omp for simd' is not supported yet.");
   1099 }
   1100 
   1101 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
   1102                                 const Twine &Name,
   1103                                 llvm::Value *Init = nullptr) {
   1104   auto LVal = CGF.MakeNaturalAlignAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
   1105   if (Init)
   1106     CGF.EmitScalarInit(Init, LVal);
   1107   return LVal;
   1108 }
   1109 
   1110 static OpenMPDirectiveKind emitSections(CodeGenFunction &CGF,
   1111                                         const OMPExecutableDirective &S) {
   1112   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
   1113   auto *CS = dyn_cast<CompoundStmt>(Stmt);
   1114   if (CS && CS->size() > 1) {
   1115     auto &&CodeGen = [&S, CS](CodeGenFunction &CGF) {
   1116       auto &C = CGF.CGM.getContext();
   1117       auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
   1118       // Emit helper vars inits.
   1119       LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
   1120                                     CGF.Builder.getInt32(0));
   1121       auto *GlobalUBVal = CGF.Builder.getInt32(CS->size() - 1);
   1122       LValue UB =
   1123           createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
   1124       LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
   1125                                     CGF.Builder.getInt32(1));
   1126       LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
   1127                                     CGF.Builder.getInt32(0));
   1128       // Loop counter.
   1129       LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
   1130       OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
   1131       CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
   1132       OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
   1133       CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
   1134       // Generate condition for loop.
   1135       BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
   1136                           OK_Ordinary, S.getLocStart(),
   1137                           /*fpContractable=*/false);
   1138       // Increment for loop counter.
   1139       UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue,
   1140                         OK_Ordinary, S.getLocStart());
   1141       auto BodyGen = [CS, &S, &IV](CodeGenFunction &CGF) {
   1142         // Iterate through all sections and emit a switch construct:
   1143         // switch (IV) {
   1144         //   case 0:
   1145         //     <SectionStmt[0]>;
   1146         //     break;
   1147         // ...
   1148         //   case <NumSection> - 1:
   1149         //     <SectionStmt[<NumSection> - 1]>;
   1150         //     break;
   1151         // }
   1152         // .omp.sections.exit:
   1153         auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
   1154         auto *SwitchStmt = CGF.Builder.CreateSwitch(
   1155             CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
   1156             CS->size());
   1157         unsigned CaseNumber = 0;
   1158         for (auto C = CS->children(); C; ++C, ++CaseNumber) {
   1159           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
   1160           CGF.EmitBlock(CaseBB);
   1161           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
   1162           CGF.EmitStmt(*C);
   1163           CGF.EmitBranch(ExitBB);
   1164         }
   1165         CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
   1166       };
   1167       // Emit static non-chunked loop.
   1168       CGF.CGM.getOpenMPRuntime().emitForInit(
   1169           CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
   1170           /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(),
   1171           ST.getAddress());
   1172       // UB = min(UB, GlobalUB);
   1173       auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
   1174       auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
   1175           CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
   1176       CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
   1177       // IV = LB;
   1178       CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
   1179       // while (idx <= UB) { BODY; ++idx; }
   1180       CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen);
   1181       // Tell the runtime we are done.
   1182       CGF.CGM.getOpenMPRuntime().emitForFinish(CGF, S.getLocStart(),
   1183                                                OMPC_SCHEDULE_static);
   1184     };
   1185 
   1186     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, CodeGen);
   1187     return OMPD_sections;
   1188   }
   1189   // If only one section is found - no need to generate loop, emit as a single
   1190   // region.
   1191   auto &&CodeGen = [Stmt](CodeGenFunction &CGF) {
   1192     CGF.EmitStmt(Stmt);
   1193     CGF.EnsureInsertPoint();
   1194   };
   1195   CGF.CGM.getOpenMPRuntime().emitSingleRegion(CGF, CodeGen, S.getLocStart(),
   1196                                               llvm::None, llvm::None,
   1197                                               llvm::None, llvm::None);
   1198   return OMPD_single;
   1199 }
   1200 
   1201 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
   1202   LexicalScope Scope(*this, S.getSourceRange());
   1203   OpenMPDirectiveKind EmittedAs = emitSections(*this, S);
   1204   // Emit an implicit barrier at the end.
   1205   if (!S.getSingleClause(OMPC_nowait)) {
   1206     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs);
   1207   }
   1208 }
   1209 
   1210 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
   1211   LexicalScope Scope(*this, S.getSourceRange());
   1212   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
   1213     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
   1214     CGF.EnsureInsertPoint();
   1215   };
   1216   CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
   1217 }
   1218 
   1219 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
   1220   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
   1221   llvm::SmallVector<const Expr *, 8> DestExprs;
   1222   llvm::SmallVector<const Expr *, 8> SrcExprs;
   1223   llvm::SmallVector<const Expr *, 8> AssignmentOps;
   1224   // Check if there are any 'copyprivate' clauses associated with this
   1225   // 'single'
   1226   // construct.
   1227   auto CopyprivateFilter = [](const OMPClause *C) -> bool {
   1228     return C->getClauseKind() == OMPC_copyprivate;
   1229   };
   1230   // Build a list of copyprivate variables along with helper expressions
   1231   // (<source>, <destination>, <destination>=<source> expressions)
   1232   typedef OMPExecutableDirective::filtered_clause_iterator<decltype(
   1233       CopyprivateFilter)> CopyprivateIter;
   1234   for (CopyprivateIter I(S.clauses(), CopyprivateFilter); I; ++I) {
   1235     auto *C = cast<OMPCopyprivateClause>(*I);
   1236     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
   1237     DestExprs.append(C->destination_exprs().begin(),
   1238                      C->destination_exprs().end());
   1239     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
   1240     AssignmentOps.append(C->assignment_ops().begin(),
   1241                          C->assignment_ops().end());
   1242   }
   1243   LexicalScope Scope(*this, S.getSourceRange());
   1244   // Emit code for 'single' region along with 'copyprivate' clauses
   1245   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
   1246     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
   1247     CGF.EnsureInsertPoint();
   1248   };
   1249   CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
   1250                                           CopyprivateVars, DestExprs, SrcExprs,
   1251                                           AssignmentOps);
   1252   // Emit an implicit barrier at the end.
   1253   if (!S.getSingleClause(OMPC_nowait)) {
   1254     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_single);
   1255   }
   1256 }
   1257 
   1258 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
   1259   LexicalScope Scope(*this, S.getSourceRange());
   1260   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
   1261     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
   1262     CGF.EnsureInsertPoint();
   1263   };
   1264   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
   1265 }
   1266 
   1267 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
   1268   LexicalScope Scope(*this, S.getSourceRange());
   1269   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
   1270     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
   1271     CGF.EnsureInsertPoint();
   1272   };
   1273   CGM.getOpenMPRuntime().emitCriticalRegion(
   1274       *this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart());
   1275 }
   1276 
   1277 void CodeGenFunction::EmitOMPParallelForDirective(
   1278     const OMPParallelForDirective &S) {
   1279   // Emit directive as a combined directive that consists of two implicit
   1280   // directives: 'parallel' with 'for' directive.
   1281   LexicalScope Scope(*this, S.getSourceRange());
   1282   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
   1283     CGF.EmitOMPWorksharingLoop(S);
   1284     // Emit implicit barrier at the end of parallel region, but this barrier
   1285     // is at the end of 'for' directive, so emit it as the implicit barrier for
   1286     // this 'for' directive.
   1287     CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
   1288                                                OMPD_parallel);
   1289   };
   1290   emitCommonOMPParallelDirective(*this, S, CodeGen);
   1291 }
   1292 
   1293 void CodeGenFunction::EmitOMPParallelForSimdDirective(
   1294     const OMPParallelForSimdDirective &) {
   1295   llvm_unreachable("CodeGen for 'omp parallel for simd' is not supported yet.");
   1296 }
   1297 
   1298 void CodeGenFunction::EmitOMPParallelSectionsDirective(
   1299     const OMPParallelSectionsDirective &S) {
   1300   // Emit directive as a combined directive that consists of two implicit
   1301   // directives: 'parallel' with 'sections' directive.
   1302   LexicalScope Scope(*this, S.getSourceRange());
   1303   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
   1304     (void)emitSections(CGF, S);
   1305     // Emit implicit barrier at the end of parallel region.
   1306     CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
   1307                                                OMPD_parallel);
   1308   };
   1309   emitCommonOMPParallelDirective(*this, S, CodeGen);
   1310 }
   1311 
   1312 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
   1313   // Emit outlined function for task construct.
   1314   LexicalScope Scope(*this, S.getSourceRange());
   1315   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
   1316   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
   1317   auto *I = CS->getCapturedDecl()->param_begin();
   1318   auto *PartId = std::next(I);
   1319   // The first function argument for tasks is a thread id, the second one is a
   1320   // part id (0 for tied tasks, >=0 for untied task).
   1321   auto &&CodeGen = [PartId, &S](CodeGenFunction &CGF) {
   1322     if (*PartId) {
   1323       // TODO: emit code for untied tasks.
   1324     }
   1325     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
   1326   };
   1327   auto OutlinedFn =
   1328       CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, CodeGen);
   1329   // Check if we should emit tied or untied task.
   1330   bool Tied = !S.getSingleClause(OMPC_untied);
   1331   // Check if the task is final
   1332   llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
   1333   if (auto *Clause = S.getSingleClause(OMPC_final)) {
   1334     // If the condition constant folds and can be elided, try to avoid emitting
   1335     // the condition and the dead arm of the if/else.
   1336     auto *Cond = cast<OMPFinalClause>(Clause)->getCondition();
   1337     bool CondConstant;
   1338     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
   1339       Final.setInt(CondConstant);
   1340     else
   1341       Final.setPointer(EvaluateExprAsBool(Cond));
   1342   } else {
   1343     // By default the task is not final.
   1344     Final.setInt(/*IntVal=*/false);
   1345   }
   1346   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
   1347   CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), Tied, Final,
   1348                                       OutlinedFn, SharedsTy, CapturedStruct);
   1349 }
   1350 
   1351 void CodeGenFunction::EmitOMPTaskyieldDirective(
   1352     const OMPTaskyieldDirective &S) {
   1353   CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
   1354 }
   1355 
   1356 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
   1357   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
   1358 }
   1359 
   1360 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) {
   1361   llvm_unreachable("CodeGen for 'omp taskwait' is not supported yet.");
   1362 }
   1363 
   1364 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
   1365   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
   1366     if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) {
   1367       auto FlushClause = cast<OMPFlushClause>(C);
   1368       return llvm::makeArrayRef(FlushClause->varlist_begin(),
   1369                                 FlushClause->varlist_end());
   1370     }
   1371     return llvm::None;
   1372   }(), S.getLocStart());
   1373 }
   1374 
   1375 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) {
   1376   llvm_unreachable("CodeGen for 'omp ordered' is not supported yet.");
   1377 }
   1378 
   1379 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
   1380                                          QualType SrcType, QualType DestType) {
   1381   assert(CGF.hasScalarEvaluationKind(DestType) &&
   1382          "DestType must have scalar evaluation kind.");
   1383   assert(!Val.isAggregate() && "Must be a scalar or complex.");
   1384   return Val.isScalar()
   1385              ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType)
   1386              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
   1387                                                  DestType);
   1388 }
   1389 
   1390 static CodeGenFunction::ComplexPairTy
   1391 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
   1392                       QualType DestType) {
   1393   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
   1394          "DestType must have complex evaluation kind.");
   1395   CodeGenFunction::ComplexPairTy ComplexVal;
   1396   if (Val.isScalar()) {
   1397     // Convert the input element to the element type of the complex.
   1398     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
   1399     auto ScalarVal =
   1400         CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType);
   1401     ComplexVal = CodeGenFunction::ComplexPairTy(
   1402         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
   1403   } else {
   1404     assert(Val.isComplex() && "Must be a scalar or complex.");
   1405     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
   1406     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
   1407     ComplexVal.first = CGF.EmitScalarConversion(
   1408         Val.getComplexVal().first, SrcElementType, DestElementType);
   1409     ComplexVal.second = CGF.EmitScalarConversion(
   1410         Val.getComplexVal().second, SrcElementType, DestElementType);
   1411   }
   1412   return ComplexVal;
   1413 }
   1414 
   1415 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
   1416                                   const Expr *X, const Expr *V,
   1417                                   SourceLocation Loc) {
   1418   // v = x;
   1419   assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
   1420   assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
   1421   LValue XLValue = CGF.EmitLValue(X);
   1422   LValue VLValue = CGF.EmitLValue(V);
   1423   RValue Res = XLValue.isGlobalReg()
   1424                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
   1425                    : CGF.EmitAtomicLoad(XLValue, Loc,
   1426                                         IsSeqCst ? llvm::SequentiallyConsistent
   1427                                                  : llvm::Monotonic,
   1428                                         XLValue.isVolatile());
   1429   // OpenMP, 2.12.6, atomic Construct
   1430   // Any atomic construct with a seq_cst clause forces the atomically
   1431   // performed operation to include an implicit flush operation without a
   1432   // list.
   1433   if (IsSeqCst)
   1434     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
   1435   switch (CGF.getEvaluationKind(V->getType())) {
   1436   case TEK_Scalar:
   1437     CGF.EmitStoreOfScalar(
   1438         convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue);
   1439     break;
   1440   case TEK_Complex:
   1441     CGF.EmitStoreOfComplex(
   1442         convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue,
   1443         /*isInit=*/false);
   1444     break;
   1445   case TEK_Aggregate:
   1446     llvm_unreachable("Must be a scalar or complex.");
   1447   }
   1448 }
   1449 
   1450 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
   1451                                    const Expr *X, const Expr *E,
   1452                                    SourceLocation Loc) {
   1453   // x = expr;
   1454   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
   1455   LValue XLValue = CGF.EmitLValue(X);
   1456   RValue ExprRValue = CGF.EmitAnyExpr(E);
   1457   if (XLValue.isGlobalReg())
   1458     CGF.EmitStoreThroughGlobalRegLValue(ExprRValue, XLValue);
   1459   else
   1460     CGF.EmitAtomicStore(ExprRValue, XLValue,
   1461                         IsSeqCst ? llvm::SequentiallyConsistent
   1462                                  : llvm::Monotonic,
   1463                         XLValue.isVolatile(), /*IsInit=*/false);
   1464   // OpenMP, 2.12.6, atomic Construct
   1465   // Any atomic construct with a seq_cst clause forces the atomically
   1466   // performed operation to include an implicit flush operation without a
   1467   // list.
   1468   if (IsSeqCst)
   1469     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
   1470 }
   1471 
   1472 bool emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, RValue Update,
   1473                       BinaryOperatorKind BO, llvm::AtomicOrdering AO,
   1474                       bool IsXLHSInRHSPart) {
   1475   auto &Context = CGF.CGM.getContext();
   1476   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
   1477   // expression is simple and atomic is allowed for the given type for the
   1478   // target platform.
   1479   if (BO == BO_Comma || !Update.isScalar() ||
   1480       !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() ||
   1481       (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
   1482        (Update.getScalarVal()->getType() !=
   1483         X.getAddress()->getType()->getPointerElementType())) ||
   1484       !Context.getTargetInfo().hasBuiltinAtomic(
   1485           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
   1486     return false;
   1487 
   1488   llvm::AtomicRMWInst::BinOp RMWOp;
   1489   switch (BO) {
   1490   case BO_Add:
   1491     RMWOp = llvm::AtomicRMWInst::Add;
   1492     break;
   1493   case BO_Sub:
   1494     if (!IsXLHSInRHSPart)
   1495       return false;
   1496     RMWOp = llvm::AtomicRMWInst::Sub;
   1497     break;
   1498   case BO_And:
   1499     RMWOp = llvm::AtomicRMWInst::And;
   1500     break;
   1501   case BO_Or:
   1502     RMWOp = llvm::AtomicRMWInst::Or;
   1503     break;
   1504   case BO_Xor:
   1505     RMWOp = llvm::AtomicRMWInst::Xor;
   1506     break;
   1507   case BO_LT:
   1508     RMWOp = X.getType()->hasSignedIntegerRepresentation()
   1509                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
   1510                                    : llvm::AtomicRMWInst::Max)
   1511                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
   1512                                    : llvm::AtomicRMWInst::UMax);
   1513     break;
   1514   case BO_GT:
   1515     RMWOp = X.getType()->hasSignedIntegerRepresentation()
   1516                 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
   1517                                    : llvm::AtomicRMWInst::Min)
   1518                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
   1519                                    : llvm::AtomicRMWInst::UMin);
   1520     break;
   1521   case BO_Mul:
   1522   case BO_Div:
   1523   case BO_Rem:
   1524   case BO_Shl:
   1525   case BO_Shr:
   1526   case BO_LAnd:
   1527   case BO_LOr:
   1528     return false;
   1529   case BO_PtrMemD:
   1530   case BO_PtrMemI:
   1531   case BO_LE:
   1532   case BO_GE:
   1533   case BO_EQ:
   1534   case BO_NE:
   1535   case BO_Assign:
   1536   case BO_AddAssign:
   1537   case BO_SubAssign:
   1538   case BO_AndAssign:
   1539   case BO_OrAssign:
   1540   case BO_XorAssign:
   1541   case BO_MulAssign:
   1542   case BO_DivAssign:
   1543   case BO_RemAssign:
   1544   case BO_ShlAssign:
   1545   case BO_ShrAssign:
   1546   case BO_Comma:
   1547     llvm_unreachable("Unsupported atomic update operation");
   1548   }
   1549   auto *UpdateVal = Update.getScalarVal();
   1550   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
   1551     UpdateVal = CGF.Builder.CreateIntCast(
   1552         IC, X.getAddress()->getType()->getPointerElementType(),
   1553         X.getType()->hasSignedIntegerRepresentation());
   1554   }
   1555   CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO);
   1556   return true;
   1557 }
   1558 
   1559 void CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
   1560     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
   1561     llvm::AtomicOrdering AO, SourceLocation Loc,
   1562     const llvm::function_ref<RValue(RValue)> &CommonGen) {
   1563   // Update expressions are allowed to have the following forms:
   1564   // x binop= expr; -> xrval + expr;
   1565   // x++, ++x -> xrval + 1;
   1566   // x--, --x -> xrval - 1;
   1567   // x = x binop expr; -> xrval binop expr
   1568   // x = expr Op x; - > expr binop xrval;
   1569   if (!emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart)) {
   1570     if (X.isGlobalReg()) {
   1571       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
   1572       // 'xrval'.
   1573       EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
   1574     } else {
   1575       // Perform compare-and-swap procedure.
   1576       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
   1577     }
   1578   }
   1579 }
   1580 
   1581 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
   1582                                     const Expr *X, const Expr *E,
   1583                                     const Expr *UE, bool IsXLHSInRHSPart,
   1584                                     SourceLocation Loc) {
   1585   assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
   1586          "Update expr in 'atomic update' must be a binary operator.");
   1587   auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
   1588   // Update expressions are allowed to have the following forms:
   1589   // x binop= expr; -> xrval + expr;
   1590   // x++, ++x -> xrval + 1;
   1591   // x--, --x -> xrval - 1;
   1592   // x = x binop expr; -> xrval binop expr
   1593   // x = expr Op x; - > expr binop xrval;
   1594   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
   1595   LValue XLValue = CGF.EmitLValue(X);
   1596   RValue ExprRValue = CGF.EmitAnyExpr(E);
   1597   auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
   1598   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
   1599   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
   1600   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
   1601   auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
   1602   auto Gen =
   1603       [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
   1604         CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
   1605         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
   1606         return CGF.EmitAnyExpr(UE);
   1607       };
   1608   CGF.EmitOMPAtomicSimpleUpdateExpr(XLValue, ExprRValue, BOUE->getOpcode(),
   1609                                     IsXLHSInRHSPart, AO, Loc, Gen);
   1610   // OpenMP, 2.12.6, atomic Construct
   1611   // Any atomic construct with a seq_cst clause forces the atomically
   1612   // performed operation to include an implicit flush operation without a
   1613   // list.
   1614   if (IsSeqCst)
   1615     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
   1616 }
   1617 
   1618 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
   1619                               bool IsSeqCst, const Expr *X, const Expr *V,
   1620                               const Expr *E, const Expr *UE,
   1621                               bool IsXLHSInRHSPart, SourceLocation Loc) {
   1622   switch (Kind) {
   1623   case OMPC_read:
   1624     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
   1625     break;
   1626   case OMPC_write:
   1627     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
   1628     break;
   1629   case OMPC_unknown:
   1630   case OMPC_update:
   1631     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
   1632     break;
   1633   case OMPC_capture:
   1634     llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet.");
   1635   case OMPC_if:
   1636   case OMPC_final:
   1637   case OMPC_num_threads:
   1638   case OMPC_private:
   1639   case OMPC_firstprivate:
   1640   case OMPC_lastprivate:
   1641   case OMPC_reduction:
   1642   case OMPC_safelen:
   1643   case OMPC_collapse:
   1644   case OMPC_default:
   1645   case OMPC_seq_cst:
   1646   case OMPC_shared:
   1647   case OMPC_linear:
   1648   case OMPC_aligned:
   1649   case OMPC_copyin:
   1650   case OMPC_copyprivate:
   1651   case OMPC_flush:
   1652   case OMPC_proc_bind:
   1653   case OMPC_schedule:
   1654   case OMPC_ordered:
   1655   case OMPC_nowait:
   1656   case OMPC_untied:
   1657   case OMPC_threadprivate:
   1658   case OMPC_mergeable:
   1659     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
   1660   }
   1661 }
   1662 
   1663 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
   1664   bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst);
   1665   OpenMPClauseKind Kind = OMPC_unknown;
   1666   for (auto *C : S.clauses()) {
   1667     // Find first clause (skip seq_cst clause, if it is first).
   1668     if (C->getClauseKind() != OMPC_seq_cst) {
   1669       Kind = C->getClauseKind();
   1670       break;
   1671     }
   1672   }
   1673 
   1674   const auto *CS =
   1675       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
   1676   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS))
   1677     enterFullExpression(EWC);
   1678 
   1679   LexicalScope Scope(*this, S.getSourceRange());
   1680   auto &&CodeGen = [&S, Kind, IsSeqCst](CodeGenFunction &CGF) {
   1681     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(),
   1682                       S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart());
   1683   };
   1684   CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
   1685 }
   1686 
   1687 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
   1688   llvm_unreachable("CodeGen for 'omp target' is not supported yet.");
   1689 }
   1690 
   1691 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
   1692   llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
   1693 }
   1694 
   1695