1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit OpenMP nodes as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/Stmt.h" 19 #include "clang/AST/StmtOpenMP.h" 20 using namespace clang; 21 using namespace CodeGen; 22 23 //===----------------------------------------------------------------------===// 24 // OpenMP Directive Emission 25 //===----------------------------------------------------------------------===// 26 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 27 /// function. Here is the logic: 28 /// if (Cond) { 29 /// CodeGen(true); 30 /// } else { 31 /// CodeGen(false); 32 /// } 33 static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 34 const std::function<void(bool)> &CodeGen) { 35 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 36 37 // If the condition constant folds and can be elided, try to avoid emitting 38 // the condition and the dead arm of the if/else. 39 bool CondConstant; 40 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 41 CodeGen(CondConstant); 42 return; 43 } 44 45 // Otherwise, the condition did not fold, or we couldn't elide it. Just 46 // emit the conditional branch. 47 auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then"); 48 auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else"); 49 auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end"); 50 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0); 51 52 // Emit the 'then' code. 53 CGF.EmitBlock(ThenBlock); 54 CodeGen(/*ThenBlock*/ true); 55 CGF.EmitBranch(ContBlock); 56 // Emit the 'else' code if present. 57 { 58 // There is no need to emit line number for unconditional branch. 59 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 60 CGF.EmitBlock(ElseBlock); 61 } 62 CodeGen(/*ThenBlock*/ false); 63 { 64 // There is no need to emit line number for unconditional branch. 65 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 66 CGF.EmitBranch(ContBlock); 67 } 68 // Emit the continuation block for code after the if. 69 CGF.EmitBlock(ContBlock, /*IsFinished*/ true); 70 } 71 72 void CodeGenFunction::EmitOMPAggregateAssign( 73 llvm::Value *DestAddr, llvm::Value *SrcAddr, QualType OriginalType, 74 const llvm::function_ref<void(llvm::Value *, llvm::Value *)> &CopyGen) { 75 // Perform element-by-element initialization. 76 QualType ElementTy; 77 auto SrcBegin = SrcAddr; 78 auto DestBegin = DestAddr; 79 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 80 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin); 81 // Cast from pointer to array type to pointer to single element. 82 SrcBegin = Builder.CreatePointerBitCastOrAddrSpaceCast(SrcBegin, 83 DestBegin->getType()); 84 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements); 85 // The basic structure here is a while-do loop. 86 auto BodyBB = createBasicBlock("omp.arraycpy.body"); 87 auto DoneBB = createBasicBlock("omp.arraycpy.done"); 88 auto IsEmpty = 89 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 90 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 91 92 // Enter the loop body, making that address the current address. 93 auto EntryBB = Builder.GetInsertBlock(); 94 EmitBlock(BodyBB); 95 auto SrcElementCurrent = 96 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 97 SrcElementCurrent->addIncoming(SrcBegin, EntryBB); 98 auto DestElementCurrent = Builder.CreatePHI(DestBegin->getType(), 2, 99 "omp.arraycpy.destElementPast"); 100 DestElementCurrent->addIncoming(DestBegin, EntryBB); 101 102 // Emit copy. 103 CopyGen(DestElementCurrent, SrcElementCurrent); 104 105 // Shift the address forward by one element. 106 auto DestElementNext = Builder.CreateConstGEP1_32( 107 DestElementCurrent, /*Idx0=*/1, "omp.arraycpy.dest.element"); 108 auto SrcElementNext = Builder.CreateConstGEP1_32( 109 SrcElementCurrent, /*Idx0=*/1, "omp.arraycpy.src.element"); 110 // Check whether we've reached the end. 111 auto Done = 112 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 113 Builder.CreateCondBr(Done, DoneBB, BodyBB); 114 DestElementCurrent->addIncoming(DestElementNext, Builder.GetInsertBlock()); 115 SrcElementCurrent->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 116 117 // Done. 118 EmitBlock(DoneBB, /*IsFinished=*/true); 119 } 120 121 void CodeGenFunction::EmitOMPCopy(CodeGenFunction &CGF, 122 QualType OriginalType, llvm::Value *DestAddr, 123 llvm::Value *SrcAddr, const VarDecl *DestVD, 124 const VarDecl *SrcVD, const Expr *Copy) { 125 if (OriginalType->isArrayType()) { 126 auto *BO = dyn_cast<BinaryOperator>(Copy); 127 if (BO && BO->getOpcode() == BO_Assign) { 128 // Perform simple memcpy for simple copying. 129 CGF.EmitAggregateAssign(DestAddr, SrcAddr, OriginalType); 130 } else { 131 // For arrays with complex element types perform element by element 132 // copying. 133 CGF.EmitOMPAggregateAssign( 134 DestAddr, SrcAddr, OriginalType, 135 [&CGF, Copy, SrcVD, DestVD](llvm::Value *DestElement, 136 llvm::Value *SrcElement) { 137 // Working with the single array element, so have to remap 138 // destination and source variables to corresponding array 139 // elements. 140 CodeGenFunction::OMPPrivateScope Remap(CGF); 141 Remap.addPrivate(DestVD, [DestElement]() -> llvm::Value *{ 142 return DestElement; 143 }); 144 Remap.addPrivate( 145 SrcVD, [SrcElement]() -> llvm::Value *{ return SrcElement; }); 146 (void)Remap.Privatize(); 147 CGF.EmitIgnoredExpr(Copy); 148 }); 149 } 150 } else { 151 // Remap pseudo source variable to private copy. 152 CodeGenFunction::OMPPrivateScope Remap(CGF); 153 Remap.addPrivate(SrcVD, [SrcAddr]() -> llvm::Value *{ return SrcAddr; }); 154 Remap.addPrivate(DestVD, [DestAddr]() -> llvm::Value *{ return DestAddr; }); 155 (void)Remap.Privatize(); 156 // Emit copying of the whole variable. 157 CGF.EmitIgnoredExpr(Copy); 158 } 159 } 160 161 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 162 OMPPrivateScope &PrivateScope) { 163 auto FirstprivateFilter = [](const OMPClause *C) -> bool { 164 return C->getClauseKind() == OMPC_firstprivate; 165 }; 166 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 167 for (OMPExecutableDirective::filtered_clause_iterator<decltype( 168 FirstprivateFilter)> I(D.clauses(), FirstprivateFilter); 169 I; ++I) { 170 auto *C = cast<OMPFirstprivateClause>(*I); 171 auto IRef = C->varlist_begin(); 172 auto InitsRef = C->inits().begin(); 173 for (auto IInit : C->private_copies()) { 174 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 175 if (EmittedAsFirstprivate.count(OrigVD) == 0) { 176 EmittedAsFirstprivate.insert(OrigVD); 177 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 178 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 179 bool IsRegistered; 180 DeclRefExpr DRE( 181 const_cast<VarDecl *>(OrigVD), 182 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 183 OrigVD) != nullptr, 184 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 185 auto *OriginalAddr = EmitLValue(&DRE).getAddress(); 186 if (OrigVD->getType()->isArrayType()) { 187 // Emit VarDecl with copy init for arrays. 188 // Get the address of the original variable captured in current 189 // captured region. 190 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{ 191 auto Emission = EmitAutoVarAlloca(*VD); 192 auto *Init = VD->getInit(); 193 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 194 // Perform simple memcpy. 195 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr, 196 (*IRef)->getType()); 197 } else { 198 EmitOMPAggregateAssign( 199 Emission.getAllocatedAddress(), OriginalAddr, 200 (*IRef)->getType(), 201 [this, VDInit, Init](llvm::Value *DestElement, 202 llvm::Value *SrcElement) { 203 // Clean up any temporaries needed by the initialization. 204 RunCleanupsScope InitScope(*this); 205 // Emit initialization for single element. 206 LocalDeclMap[VDInit] = SrcElement; 207 EmitAnyExprToMem(Init, DestElement, 208 Init->getType().getQualifiers(), 209 /*IsInitializer*/ false); 210 LocalDeclMap.erase(VDInit); 211 }); 212 } 213 EmitAutoVarCleanups(Emission); 214 return Emission.getAllocatedAddress(); 215 }); 216 } else { 217 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{ 218 // Emit private VarDecl with copy init. 219 // Remap temp VDInit variable to the address of the original 220 // variable 221 // (for proper handling of captured global variables). 222 LocalDeclMap[VDInit] = OriginalAddr; 223 EmitDecl(*VD); 224 LocalDeclMap.erase(VDInit); 225 return GetAddrOfLocalVar(VD); 226 }); 227 } 228 assert(IsRegistered && 229 "firstprivate var already registered as private"); 230 // Silence the warning about unused variable. 231 (void)IsRegistered; 232 } 233 ++IRef, ++InitsRef; 234 } 235 } 236 return !EmittedAsFirstprivate.empty(); 237 } 238 239 void CodeGenFunction::EmitOMPPrivateClause( 240 const OMPExecutableDirective &D, 241 CodeGenFunction::OMPPrivateScope &PrivateScope) { 242 auto PrivateFilter = [](const OMPClause *C) -> bool { 243 return C->getClauseKind() == OMPC_private; 244 }; 245 for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)> 246 I(D.clauses(), PrivateFilter); I; ++I) { 247 auto *C = cast<OMPPrivateClause>(*I); 248 auto IRef = C->varlist_begin(); 249 for (auto IInit : C->private_copies()) { 250 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 251 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 252 bool IsRegistered = 253 PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { 254 // Emit private VarDecl with copy init. 255 EmitDecl(*VD); 256 return GetAddrOfLocalVar(VD); 257 }); 258 assert(IsRegistered && "private var already registered as private"); 259 // Silence the warning about unused variable. 260 (void)IsRegistered; 261 ++IRef; 262 } 263 } 264 } 265 266 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 267 // threadprivate_var1 = master_threadprivate_var1; 268 // operator=(threadprivate_var2, master_threadprivate_var2); 269 // ... 270 // __kmpc_barrier(&loc, global_tid); 271 auto CopyinFilter = [](const OMPClause *C) -> bool { 272 return C->getClauseKind() == OMPC_copyin; 273 }; 274 llvm::DenseSet<const VarDecl *> CopiedVars; 275 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 276 for (OMPExecutableDirective::filtered_clause_iterator<decltype(CopyinFilter)> 277 I(D.clauses(), CopyinFilter); 278 I; ++I) { 279 auto *C = cast<OMPCopyinClause>(*I); 280 auto IRef = C->varlist_begin(); 281 auto ISrcRef = C->source_exprs().begin(); 282 auto IDestRef = C->destination_exprs().begin(); 283 for (auto *AssignOp : C->assignment_ops()) { 284 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 285 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 286 // Get the address of the master variable. 287 auto *MasterAddr = VD->isStaticLocal() 288 ? CGM.getStaticLocalDeclAddress(VD) 289 : CGM.GetAddrOfGlobal(VD); 290 // Get the address of the threadprivate variable. 291 auto *PrivateAddr = EmitLValue(*IRef).getAddress(); 292 if (CopiedVars.size() == 1) { 293 // At first check if current thread is a master thread. If it is, no 294 // need to copy data. 295 CopyBegin = createBasicBlock("copyin.not.master"); 296 CopyEnd = createBasicBlock("copyin.not.master.end"); 297 Builder.CreateCondBr( 298 Builder.CreateICmpNE( 299 Builder.CreatePtrToInt(MasterAddr, CGM.IntPtrTy), 300 Builder.CreatePtrToInt(PrivateAddr, CGM.IntPtrTy)), 301 CopyBegin, CopyEnd); 302 EmitBlock(CopyBegin); 303 } 304 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 305 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 306 EmitOMPCopy(*this, (*IRef)->getType(), PrivateAddr, MasterAddr, DestVD, 307 SrcVD, AssignOp); 308 } 309 ++IRef; 310 ++ISrcRef; 311 ++IDestRef; 312 } 313 } 314 if (CopyEnd) { 315 // Exit out of copying procedure for non-master thread. 316 EmitBlock(CopyEnd, /*IsFinished=*/true); 317 return true; 318 } 319 return false; 320 } 321 322 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 323 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 324 auto LastprivateFilter = [](const OMPClause *C) -> bool { 325 return C->getClauseKind() == OMPC_lastprivate; 326 }; 327 bool HasAtLeastOneLastprivate = false; 328 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 329 for (OMPExecutableDirective::filtered_clause_iterator<decltype( 330 LastprivateFilter)> I(D.clauses(), LastprivateFilter); 331 I; ++I) { 332 auto *C = cast<OMPLastprivateClause>(*I); 333 auto IRef = C->varlist_begin(); 334 auto IDestRef = C->destination_exprs().begin(); 335 for (auto *IInit : C->private_copies()) { 336 // Keep the address of the original variable for future update at the end 337 // of the loop. 338 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 339 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 340 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 341 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> llvm::Value *{ 342 DeclRefExpr DRE( 343 const_cast<VarDecl *>(OrigVD), 344 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( 345 OrigVD) != nullptr, 346 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 347 return EmitLValue(&DRE).getAddress(); 348 }); 349 // Check if the variable is also a firstprivate: in this case IInit is 350 // not generated. Initialization of this variable will happen in codegen 351 // for 'firstprivate' clause. 352 if (!IInit) 353 continue; 354 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 355 bool IsRegistered = 356 PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{ 357 // Emit private VarDecl with copy init. 358 EmitDecl(*VD); 359 return GetAddrOfLocalVar(VD); 360 }); 361 assert(IsRegistered && "lastprivate var already registered as private"); 362 HasAtLeastOneLastprivate = HasAtLeastOneLastprivate || IsRegistered; 363 } 364 ++IRef, ++IDestRef; 365 } 366 } 367 return HasAtLeastOneLastprivate; 368 } 369 370 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 371 const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) { 372 // Emit following code: 373 // if (<IsLastIterCond>) { 374 // orig_var1 = private_orig_var1; 375 // ... 376 // orig_varn = private_orig_varn; 377 // } 378 auto *ThenBB = createBasicBlock(".omp.lastprivate.then"); 379 auto *DoneBB = createBasicBlock(".omp.lastprivate.done"); 380 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 381 EmitBlock(ThenBB); 382 { 383 auto LastprivateFilter = [](const OMPClause *C) -> bool { 384 return C->getClauseKind() == OMPC_lastprivate; 385 }; 386 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 387 for (OMPExecutableDirective::filtered_clause_iterator<decltype( 388 LastprivateFilter)> I(D.clauses(), LastprivateFilter); 389 I; ++I) { 390 auto *C = cast<OMPLastprivateClause>(*I); 391 auto IRef = C->varlist_begin(); 392 auto ISrcRef = C->source_exprs().begin(); 393 auto IDestRef = C->destination_exprs().begin(); 394 for (auto *AssignOp : C->assignment_ops()) { 395 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 396 if (AlreadyEmittedVars.insert(PrivateVD->getCanonicalDecl()).second) { 397 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 398 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 399 // Get the address of the original variable. 400 auto *OriginalAddr = GetAddrOfLocalVar(DestVD); 401 // Get the address of the private variable. 402 auto *PrivateAddr = GetAddrOfLocalVar(PrivateVD); 403 EmitOMPCopy(*this, (*IRef)->getType(), OriginalAddr, PrivateAddr, 404 DestVD, SrcVD, AssignOp); 405 } 406 ++IRef; 407 ++ISrcRef; 408 ++IDestRef; 409 } 410 } 411 } 412 EmitBlock(DoneBB, /*IsFinished=*/true); 413 } 414 415 void CodeGenFunction::EmitOMPReductionClauseInit( 416 const OMPExecutableDirective &D, 417 CodeGenFunction::OMPPrivateScope &PrivateScope) { 418 auto ReductionFilter = [](const OMPClause *C) -> bool { 419 return C->getClauseKind() == OMPC_reduction; 420 }; 421 for (OMPExecutableDirective::filtered_clause_iterator<decltype( 422 ReductionFilter)> I(D.clauses(), ReductionFilter); 423 I; ++I) { 424 auto *C = cast<OMPReductionClause>(*I); 425 auto ILHS = C->lhs_exprs().begin(); 426 auto IRHS = C->rhs_exprs().begin(); 427 for (auto IRef : C->varlists()) { 428 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 429 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 430 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 431 // Store the address of the original variable associated with the LHS 432 // implicit variable. 433 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> llvm::Value *{ 434 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), 435 CapturedStmtInfo->lookup(OrigVD) != nullptr, 436 IRef->getType(), VK_LValue, IRef->getExprLoc()); 437 return EmitLValue(&DRE).getAddress(); 438 }); 439 // Emit reduction copy. 440 bool IsRegistered = 441 PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> llvm::Value *{ 442 // Emit private VarDecl with reduction init. 443 EmitDecl(*PrivateVD); 444 return GetAddrOfLocalVar(PrivateVD); 445 }); 446 assert(IsRegistered && "private var already registered as private"); 447 // Silence the warning about unused variable. 448 (void)IsRegistered; 449 ++ILHS, ++IRHS; 450 } 451 } 452 } 453 454 void CodeGenFunction::EmitOMPReductionClauseFinal( 455 const OMPExecutableDirective &D) { 456 llvm::SmallVector<const Expr *, 8> LHSExprs; 457 llvm::SmallVector<const Expr *, 8> RHSExprs; 458 llvm::SmallVector<const Expr *, 8> ReductionOps; 459 auto ReductionFilter = [](const OMPClause *C) -> bool { 460 return C->getClauseKind() == OMPC_reduction; 461 }; 462 bool HasAtLeastOneReduction = false; 463 for (OMPExecutableDirective::filtered_clause_iterator<decltype( 464 ReductionFilter)> I(D.clauses(), ReductionFilter); 465 I; ++I) { 466 HasAtLeastOneReduction = true; 467 auto *C = cast<OMPReductionClause>(*I); 468 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 469 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 470 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 471 } 472 if (HasAtLeastOneReduction) { 473 // Emit nowait reduction if nowait clause is present or directive is a 474 // parallel directive (it always has implicit barrier). 475 CGM.getOpenMPRuntime().emitReduction( 476 *this, D.getLocEnd(), LHSExprs, RHSExprs, ReductionOps, 477 D.getSingleClause(OMPC_nowait) || 478 isOpenMPParallelDirective(D.getDirectiveKind())); 479 } 480 } 481 482 /// \brief Emits code for OpenMP parallel directive in the parallel region. 483 static void emitOMPParallelCall(CodeGenFunction &CGF, 484 const OMPExecutableDirective &S, 485 llvm::Value *OutlinedFn, 486 llvm::Value *CapturedStruct) { 487 if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) { 488 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 489 auto NumThreadsClause = cast<OMPNumThreadsClause>(C); 490 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 491 /*IgnoreResultAssign*/ true); 492 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 493 CGF, NumThreads, NumThreadsClause->getLocStart()); 494 } 495 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, 496 CapturedStruct); 497 } 498 499 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, 500 const OMPExecutableDirective &S, 501 const RegionCodeGenTy &CodeGen) { 502 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 503 auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS); 504 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 505 S, *CS->getCapturedDecl()->param_begin(), CodeGen); 506 if (auto C = S.getSingleClause(/*K*/ OMPC_if)) { 507 auto Cond = cast<OMPIfClause>(C)->getCondition(); 508 EmitOMPIfClause(CGF, Cond, [&](bool ThenBlock) { 509 if (ThenBlock) 510 emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct); 511 else 512 CGF.CGM.getOpenMPRuntime().emitSerialCall(CGF, S.getLocStart(), 513 OutlinedFn, CapturedStruct); 514 }); 515 } else 516 emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct); 517 } 518 519 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 520 LexicalScope Scope(*this, S.getSourceRange()); 521 // Emit parallel region as a standalone region. 522 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 523 OMPPrivateScope PrivateScope(CGF); 524 bool Copyins = CGF.EmitOMPCopyinClause(S); 525 bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope); 526 if (Copyins || Firstprivates) { 527 // Emit implicit barrier to synchronize threads and avoid data races on 528 // initialization of firstprivate variables or propagation master's thread 529 // values of threadprivate variables to local instances of that variables 530 // of all other implicit threads. 531 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), 532 OMPD_unknown); 533 } 534 CGF.EmitOMPPrivateClause(S, PrivateScope); 535 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 536 (void)PrivateScope.Privatize(); 537 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 538 CGF.EmitOMPReductionClauseFinal(S); 539 // Emit implicit barrier at the end of the 'parallel' directive. 540 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), 541 OMPD_unknown); 542 }; 543 emitCommonOMPParallelDirective(*this, S, CodeGen); 544 } 545 546 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S, 547 bool SeparateIter) { 548 RunCleanupsScope BodyScope(*this); 549 // Update counters values on current iteration. 550 for (auto I : S.updates()) { 551 EmitIgnoredExpr(I); 552 } 553 // Update the linear variables. 554 for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { 555 for (auto U : C->updates()) { 556 EmitIgnoredExpr(U); 557 } 558 } 559 560 // On a continue in the body, jump to the end. 561 auto Continue = getJumpDestInCurrentScope("omp.body.continue"); 562 BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue)); 563 // Emit loop body. 564 EmitStmt(S.getBody()); 565 // The end (updates/cleanups). 566 EmitBlock(Continue.getBlock()); 567 BreakContinueStack.pop_back(); 568 if (SeparateIter) { 569 // TODO: Update lastprivates if the SeparateIter flag is true. 570 // This will be implemented in a follow-up OMPLastprivateClause patch, but 571 // result should be still correct without it, as we do not make these 572 // variables private yet. 573 } 574 } 575 576 void CodeGenFunction::EmitOMPInnerLoop( 577 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 578 const Expr *IncExpr, 579 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen) { 580 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 581 auto Cnt = getPGORegionCounter(&S); 582 583 // Start the loop with a block that tests the condition. 584 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 585 EmitBlock(CondBlock); 586 LoopStack.push(CondBlock); 587 588 // If there are any cleanups between here and the loop-exit scope, 589 // create a block to stage a loop exit along. 590 auto ExitBlock = LoopExit.getBlock(); 591 if (RequiresCleanup) 592 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 593 594 auto LoopBody = createBasicBlock("omp.inner.for.body"); 595 596 // Emit condition. 597 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, Cnt.getCount()); 598 if (ExitBlock != LoopExit.getBlock()) { 599 EmitBlock(ExitBlock); 600 EmitBranchThroughCleanup(LoopExit); 601 } 602 603 EmitBlock(LoopBody); 604 Cnt.beginRegion(Builder); 605 606 // Create a block for the increment. 607 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 608 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 609 610 BodyGen(*this); 611 612 // Emit "IV = IV + 1" and a back-edge to the condition block. 613 EmitBlock(Continue.getBlock()); 614 EmitIgnoredExpr(IncExpr); 615 BreakContinueStack.pop_back(); 616 EmitBranch(CondBlock); 617 LoopStack.pop(); 618 // Emit the fall-through block. 619 EmitBlock(LoopExit.getBlock()); 620 } 621 622 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) { 623 auto IC = S.counters().begin(); 624 for (auto F : S.finals()) { 625 if (LocalDeclMap.lookup(cast<DeclRefExpr>((*IC))->getDecl())) { 626 EmitIgnoredExpr(F); 627 } 628 ++IC; 629 } 630 // Emit the final values of the linear variables. 631 for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { 632 for (auto F : C->finals()) { 633 EmitIgnoredExpr(F); 634 } 635 } 636 } 637 638 static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM, 639 const OMPAlignedClause &Clause) { 640 unsigned ClauseAlignment = 0; 641 if (auto AlignmentExpr = Clause.getAlignment()) { 642 auto AlignmentCI = 643 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 644 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); 645 } 646 for (auto E : Clause.varlists()) { 647 unsigned Alignment = ClauseAlignment; 648 if (Alignment == 0) { 649 // OpenMP [2.8.1, Description] 650 // If no optional parameter is specified, implementation-defined default 651 // alignments for SIMD instructions on the target platforms are assumed. 652 Alignment = CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment( 653 E->getType()); 654 } 655 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && 656 "alignment is not power of 2"); 657 if (Alignment != 0) { 658 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 659 CGF.EmitAlignmentAssumption(PtrValue, Alignment); 660 } 661 } 662 } 663 664 static void EmitPrivateLoopCounters(CodeGenFunction &CGF, 665 CodeGenFunction::OMPPrivateScope &LoopScope, 666 ArrayRef<Expr *> Counters) { 667 for (auto *E : Counters) { 668 auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 669 bool IsRegistered = LoopScope.addPrivate(VD, [&]() -> llvm::Value * { 670 // Emit var without initialization. 671 auto VarEmission = CGF.EmitAutoVarAlloca(*VD); 672 CGF.EmitAutoVarCleanups(VarEmission); 673 return VarEmission.getAllocatedAddress(); 674 }); 675 assert(IsRegistered && "counter already registered as private"); 676 // Silence the warning about unused variable. 677 (void)IsRegistered; 678 } 679 } 680 681 static void 682 EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, 683 CodeGenFunction::OMPPrivateScope &PrivateScope) { 684 for (auto Clause : OMPExecutableDirective::linear_filter(D.clauses())) { 685 for (auto *E : Clause->varlists()) { 686 auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 687 bool IsRegistered = PrivateScope.addPrivate(VD, [&]()->llvm::Value * { 688 // Emit var without initialization. 689 auto VarEmission = CGF.EmitAutoVarAlloca(*VD); 690 CGF.EmitAutoVarCleanups(VarEmission); 691 return VarEmission.getAllocatedAddress(); 692 }); 693 assert(IsRegistered && "linear var already registered as private"); 694 // Silence the warning about unused variable. 695 (void)IsRegistered; 696 } 697 } 698 } 699 700 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 701 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 702 // Pragma 'simd' code depends on presence of 'lastprivate'. 703 // If present, we have to separate last iteration of the loop: 704 // 705 // if (LastIteration != 0) { 706 // for (IV in 0..LastIteration-1) BODY; 707 // BODY with updates of lastprivate vars; 708 // <Final counter/linear vars updates>; 709 // } 710 // 711 // otherwise (when there's no lastprivate): 712 // 713 // for (IV in 0..LastIteration) BODY; 714 // <Final counter/linear vars updates>; 715 // 716 717 // Walk clauses and process safelen/lastprivate. 718 bool SeparateIter = false; 719 CGF.LoopStack.setParallel(); 720 CGF.LoopStack.setVectorizerEnable(true); 721 for (auto C : S.clauses()) { 722 switch (C->getClauseKind()) { 723 case OMPC_safelen: { 724 RValue Len = CGF.EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(), 725 AggValueSlot::ignored(), true); 726 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 727 CGF.LoopStack.setVectorizerWidth(Val->getZExtValue()); 728 // In presence of finite 'safelen', it may be unsafe to mark all 729 // the memory instructions parallel, because loop-carried 730 // dependences of 'safelen' iterations are possible. 731 CGF.LoopStack.setParallel(false); 732 break; 733 } 734 case OMPC_aligned: 735 EmitOMPAlignedClause(CGF, CGF.CGM, cast<OMPAlignedClause>(*C)); 736 break; 737 case OMPC_lastprivate: 738 SeparateIter = true; 739 break; 740 default: 741 // Not handled yet 742 ; 743 } 744 } 745 746 // Emit inits for the linear variables. 747 for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { 748 for (auto Init : C->inits()) { 749 auto *D = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 750 CGF.EmitVarDecl(*D); 751 } 752 } 753 754 // Emit the loop iteration variable. 755 const Expr *IVExpr = S.getIterationVariable(); 756 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 757 CGF.EmitVarDecl(*IVDecl); 758 CGF.EmitIgnoredExpr(S.getInit()); 759 760 // Emit the iterations count variable. 761 // If it is not a variable, Sema decided to calculate iterations count on 762 // each 763 // iteration (e.g., it is foldable into a constant). 764 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 765 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 766 // Emit calculation of the iterations count. 767 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 768 } 769 770 // Emit the linear steps for the linear clauses. 771 // If a step is not constant, it is pre-calculated before the loop. 772 for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { 773 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 774 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 775 CGF.EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 776 // Emit calculation of the linear step. 777 CGF.EmitIgnoredExpr(CS); 778 } 779 } 780 781 if (SeparateIter) { 782 // Emit: if (LastIteration > 0) - begin. 783 RegionCounter Cnt = CGF.getPGORegionCounter(&S); 784 auto ThenBlock = CGF.createBasicBlock("simd.if.then"); 785 auto ContBlock = CGF.createBasicBlock("simd.if.end"); 786 CGF.EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, 787 Cnt.getCount()); 788 CGF.EmitBlock(ThenBlock); 789 Cnt.beginRegion(CGF.Builder); 790 // Emit 'then' code. 791 { 792 OMPPrivateScope LoopScope(CGF); 793 EmitPrivateLoopCounters(CGF, LoopScope, S.counters()); 794 EmitPrivateLinearVars(CGF, S, LoopScope); 795 CGF.EmitOMPPrivateClause(S, LoopScope); 796 (void)LoopScope.Privatize(); 797 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), 798 S.getCond(/*SeparateIter=*/true), S.getInc(), 799 [&S](CodeGenFunction &CGF) { 800 CGF.EmitOMPLoopBody(S); 801 CGF.EmitStopPoint(&S); 802 }); 803 CGF.EmitOMPLoopBody(S, /* SeparateIter */ true); 804 } 805 CGF.EmitOMPSimdFinal(S); 806 // Emit: if (LastIteration != 0) - end. 807 CGF.EmitBranch(ContBlock); 808 CGF.EmitBlock(ContBlock, true); 809 } else { 810 { 811 OMPPrivateScope LoopScope(CGF); 812 EmitPrivateLoopCounters(CGF, LoopScope, S.counters()); 813 EmitPrivateLinearVars(CGF, S, LoopScope); 814 CGF.EmitOMPPrivateClause(S, LoopScope); 815 (void)LoopScope.Privatize(); 816 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), 817 S.getCond(/*SeparateIter=*/false), S.getInc(), 818 [&S](CodeGenFunction &CGF) { 819 CGF.EmitOMPLoopBody(S); 820 CGF.EmitStopPoint(&S); 821 }); 822 } 823 CGF.EmitOMPSimdFinal(S); 824 } 825 }; 826 CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); 827 } 828 829 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, 830 const OMPLoopDirective &S, 831 OMPPrivateScope &LoopScope, 832 llvm::Value *LB, llvm::Value *UB, 833 llvm::Value *ST, llvm::Value *IL, 834 llvm::Value *Chunk) { 835 auto &RT = CGM.getOpenMPRuntime(); 836 837 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 838 const bool Dynamic = RT.isDynamic(ScheduleKind); 839 840 assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && 841 "static non-chunked schedule does not need outer loop"); 842 843 // Emit outer loop. 844 // 845 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 846 // When schedule(dynamic,chunk_size) is specified, the iterations are 847 // distributed to threads in the team in chunks as the threads request them. 848 // Each thread executes a chunk of iterations, then requests another chunk, 849 // until no chunks remain to be distributed. Each chunk contains chunk_size 850 // iterations, except for the last chunk to be distributed, which may have 851 // fewer iterations. When no chunk_size is specified, it defaults to 1. 852 // 853 // When schedule(guided,chunk_size) is specified, the iterations are assigned 854 // to threads in the team in chunks as the executing threads request them. 855 // Each thread executes a chunk of iterations, then requests another chunk, 856 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 857 // each chunk is proportional to the number of unassigned iterations divided 858 // by the number of threads in the team, decreasing to 1. For a chunk_size 859 // with value k (greater than 1), the size of each chunk is determined in the 860 // same way, with the restriction that the chunks do not contain fewer than k 861 // iterations (except for the last chunk to be assigned, which may have fewer 862 // than k iterations). 863 // 864 // When schedule(auto) is specified, the decision regarding scheduling is 865 // delegated to the compiler and/or runtime system. The programmer gives the 866 // implementation the freedom to choose any possible mapping of iterations to 867 // threads in the team. 868 // 869 // When schedule(runtime) is specified, the decision regarding scheduling is 870 // deferred until run time, and the schedule and chunk size are taken from the 871 // run-sched-var ICV. If the ICV is set to auto, the schedule is 872 // implementation defined 873 // 874 // while(__kmpc_dispatch_next(&LB, &UB)) { 875 // idx = LB; 876 // while (idx <= UB) { BODY; ++idx; } // inner loop 877 // } 878 // 879 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 880 // When schedule(static, chunk_size) is specified, iterations are divided into 881 // chunks of size chunk_size, and the chunks are assigned to the threads in 882 // the team in a round-robin fashion in the order of the thread number. 883 // 884 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 885 // while (idx <= UB) { BODY; ++idx; } // inner loop 886 // LB = LB + ST; 887 // UB = UB + ST; 888 // } 889 // 890 891 const Expr *IVExpr = S.getIterationVariable(); 892 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 893 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 894 895 RT.emitForInit( 896 *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB, 897 (Dynamic ? EmitAnyExpr(S.getLastIteration()).getScalarVal() : UB), ST, 898 Chunk); 899 900 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 901 902 // Start the loop with a block that tests the condition. 903 auto CondBlock = createBasicBlock("omp.dispatch.cond"); 904 EmitBlock(CondBlock); 905 LoopStack.push(CondBlock); 906 907 llvm::Value *BoolCondVal = nullptr; 908 if (!Dynamic) { 909 // UB = min(UB, GlobalUB) 910 EmitIgnoredExpr(S.getEnsureUpperBound()); 911 // IV = LB 912 EmitIgnoredExpr(S.getInit()); 913 // IV < UB 914 BoolCondVal = EvaluateExprAsBool(S.getCond(false)); 915 } else { 916 BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, 917 IL, LB, UB, ST); 918 } 919 920 // If there are any cleanups between here and the loop-exit scope, 921 // create a block to stage a loop exit along. 922 auto ExitBlock = LoopExit.getBlock(); 923 if (LoopScope.requiresCleanups()) 924 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 925 926 auto LoopBody = createBasicBlock("omp.dispatch.body"); 927 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 928 if (ExitBlock != LoopExit.getBlock()) { 929 EmitBlock(ExitBlock); 930 EmitBranchThroughCleanup(LoopExit); 931 } 932 EmitBlock(LoopBody); 933 934 // Emit "IV = LB" (in case of static schedule, we have already calculated new 935 // LB for loop condition and emitted it above). 936 if (Dynamic) 937 EmitIgnoredExpr(S.getInit()); 938 939 // Create a block for the increment. 940 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 941 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 942 943 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), 944 S.getCond(/*SeparateIter=*/false), S.getInc(), 945 [&S](CodeGenFunction &CGF) { 946 CGF.EmitOMPLoopBody(S); 947 CGF.EmitStopPoint(&S); 948 }); 949 950 EmitBlock(Continue.getBlock()); 951 BreakContinueStack.pop_back(); 952 if (!Dynamic) { 953 // Emit "LB = LB + Stride", "UB = UB + Stride". 954 EmitIgnoredExpr(S.getNextLowerBound()); 955 EmitIgnoredExpr(S.getNextUpperBound()); 956 } 957 958 EmitBranch(CondBlock); 959 LoopStack.pop(); 960 // Emit the fall-through block. 961 EmitBlock(LoopExit.getBlock()); 962 963 // Tell the runtime we are done. 964 // FIXME: Also call fini for ordered loops with dynamic scheduling. 965 if (!Dynamic) 966 RT.emitForFinish(*this, S.getLocStart(), ScheduleKind); 967 } 968 969 /// \brief Emit a helper variable and return corresponding lvalue. 970 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 971 const DeclRefExpr *Helper) { 972 auto VDecl = cast<VarDecl>(Helper->getDecl()); 973 CGF.EmitVarDecl(*VDecl); 974 return CGF.EmitLValue(Helper); 975 } 976 977 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { 978 // Emit the loop iteration variable. 979 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 980 auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); 981 EmitVarDecl(*IVDecl); 982 983 // Emit the iterations count variable. 984 // If it is not a variable, Sema decided to calculate iterations count on each 985 // iteration (e.g., it is foldable into a constant). 986 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 987 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 988 // Emit calculation of the iterations count. 989 EmitIgnoredExpr(S.getCalcLastIteration()); 990 } 991 992 auto &RT = CGM.getOpenMPRuntime(); 993 994 bool HasLastprivateClause; 995 // Check pre-condition. 996 { 997 // Skip the entire loop if we don't meet the precondition. 998 RegionCounter Cnt = getPGORegionCounter(&S); 999 auto ThenBlock = createBasicBlock("omp.precond.then"); 1000 auto ContBlock = createBasicBlock("omp.precond.end"); 1001 EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount()); 1002 EmitBlock(ThenBlock); 1003 Cnt.beginRegion(Builder); 1004 // Emit 'then' code. 1005 { 1006 // Emit helper vars inits. 1007 LValue LB = 1008 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); 1009 LValue UB = 1010 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); 1011 LValue ST = 1012 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 1013 LValue IL = 1014 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 1015 1016 OMPPrivateScope LoopScope(*this); 1017 if (EmitOMPFirstprivateClause(S, LoopScope)) { 1018 // Emit implicit barrier to synchronize threads and avoid data races on 1019 // initialization of firstprivate variables. 1020 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), 1021 OMPD_unknown); 1022 } 1023 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 1024 EmitPrivateLoopCounters(*this, LoopScope, S.counters()); 1025 (void)LoopScope.Privatize(); 1026 1027 // Detect the loop schedule kind and chunk. 1028 auto ScheduleKind = OMPC_SCHEDULE_unknown; 1029 llvm::Value *Chunk = nullptr; 1030 if (auto C = cast_or_null<OMPScheduleClause>( 1031 S.getSingleClause(OMPC_schedule))) { 1032 ScheduleKind = C->getScheduleKind(); 1033 if (auto Ch = C->getChunkSize()) { 1034 Chunk = EmitScalarExpr(Ch); 1035 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 1036 S.getIterationVariable()->getType()); 1037 } 1038 } 1039 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1040 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1041 if (RT.isStaticNonchunked(ScheduleKind, 1042 /* Chunked */ Chunk != nullptr)) { 1043 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1044 // When no chunk_size is specified, the iteration space is divided into 1045 // chunks that are approximately equal in size, and at most one chunk is 1046 // distributed to each thread. Note that the size of the chunks is 1047 // unspecified in this case. 1048 RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, 1049 IL.getAddress(), LB.getAddress(), UB.getAddress(), 1050 ST.getAddress()); 1051 // UB = min(UB, GlobalUB); 1052 EmitIgnoredExpr(S.getEnsureUpperBound()); 1053 // IV = LB; 1054 EmitIgnoredExpr(S.getInit()); 1055 // while (idx <= UB) { BODY; ++idx; } 1056 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), 1057 S.getCond(/*SeparateIter=*/false), S.getInc(), 1058 [&S](CodeGenFunction &CGF) { 1059 CGF.EmitOMPLoopBody(S); 1060 CGF.EmitStopPoint(&S); 1061 }); 1062 // Tell the runtime we are done. 1063 RT.emitForFinish(*this, S.getLocStart(), ScheduleKind); 1064 } else { 1065 // Emit the outer loop, which requests its work chunk [LB..UB] from 1066 // runtime and runs the inner loop to process it. 1067 EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(), 1068 UB.getAddress(), ST.getAddress(), IL.getAddress(), 1069 Chunk); 1070 } 1071 // Emit final copy of the lastprivate variables if IsLastIter != 0. 1072 if (HasLastprivateClause) 1073 EmitOMPLastprivateClauseFinal( 1074 S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); 1075 } 1076 // We're now done with the loop, so jump to the continuation block. 1077 EmitBranch(ContBlock); 1078 EmitBlock(ContBlock, true); 1079 } 1080 return HasLastprivateClause; 1081 } 1082 1083 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 1084 LexicalScope Scope(*this, S.getSourceRange()); 1085 bool HasLastprivates = false; 1086 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { 1087 HasLastprivates = CGF.EmitOMPWorksharingLoop(S); 1088 }; 1089 CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); 1090 1091 // Emit an implicit barrier at the end. 1092 if (!S.getSingleClause(OMPC_nowait) || HasLastprivates) { 1093 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); 1094 } 1095 } 1096 1097 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) { 1098 llvm_unreachable("CodeGen for 'omp for simd' is not supported yet."); 1099 } 1100 1101 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 1102 const Twine &Name, 1103 llvm::Value *Init = nullptr) { 1104 auto LVal = CGF.MakeNaturalAlignAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 1105 if (Init) 1106 CGF.EmitScalarInit(Init, LVal); 1107 return LVal; 1108 } 1109 1110 static OpenMPDirectiveKind emitSections(CodeGenFunction &CGF, 1111 const OMPExecutableDirective &S) { 1112 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 1113 auto *CS = dyn_cast<CompoundStmt>(Stmt); 1114 if (CS && CS->size() > 1) { 1115 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF) { 1116 auto &C = CGF.CGM.getContext(); 1117 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1118 // Emit helper vars inits. 1119 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 1120 CGF.Builder.getInt32(0)); 1121 auto *GlobalUBVal = CGF.Builder.getInt32(CS->size() - 1); 1122 LValue UB = 1123 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 1124 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 1125 CGF.Builder.getInt32(1)); 1126 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 1127 CGF.Builder.getInt32(0)); 1128 // Loop counter. 1129 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 1130 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 1131 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 1132 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); 1133 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 1134 // Generate condition for loop. 1135 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 1136 OK_Ordinary, S.getLocStart(), 1137 /*fpContractable=*/false); 1138 // Increment for loop counter. 1139 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, 1140 OK_Ordinary, S.getLocStart()); 1141 auto BodyGen = [CS, &S, &IV](CodeGenFunction &CGF) { 1142 // Iterate through all sections and emit a switch construct: 1143 // switch (IV) { 1144 // case 0: 1145 // <SectionStmt[0]>; 1146 // break; 1147 // ... 1148 // case <NumSection> - 1: 1149 // <SectionStmt[<NumSection> - 1]>; 1150 // break; 1151 // } 1152 // .omp.sections.exit: 1153 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 1154 auto *SwitchStmt = CGF.Builder.CreateSwitch( 1155 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, 1156 CS->size()); 1157 unsigned CaseNumber = 0; 1158 for (auto C = CS->children(); C; ++C, ++CaseNumber) { 1159 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 1160 CGF.EmitBlock(CaseBB); 1161 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 1162 CGF.EmitStmt(*C); 1163 CGF.EmitBranch(ExitBB); 1164 } 1165 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 1166 }; 1167 // Emit static non-chunked loop. 1168 CGF.CGM.getOpenMPRuntime().emitForInit( 1169 CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, 1170 /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(), 1171 ST.getAddress()); 1172 // UB = min(UB, GlobalUB); 1173 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); 1174 auto *MinUBGlobalUB = CGF.Builder.CreateSelect( 1175 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 1176 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 1177 // IV = LB; 1178 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); 1179 // while (idx <= UB) { BODY; ++idx; } 1180 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen); 1181 // Tell the runtime we are done. 1182 CGF.CGM.getOpenMPRuntime().emitForFinish(CGF, S.getLocStart(), 1183 OMPC_SCHEDULE_static); 1184 }; 1185 1186 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, CodeGen); 1187 return OMPD_sections; 1188 } 1189 // If only one section is found - no need to generate loop, emit as a single 1190 // region. 1191 auto &&CodeGen = [Stmt](CodeGenFunction &CGF) { 1192 CGF.EmitStmt(Stmt); 1193 CGF.EnsureInsertPoint(); 1194 }; 1195 CGF.CGM.getOpenMPRuntime().emitSingleRegion(CGF, CodeGen, S.getLocStart(), 1196 llvm::None, llvm::None, 1197 llvm::None, llvm::None); 1198 return OMPD_single; 1199 } 1200 1201 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 1202 LexicalScope Scope(*this, S.getSourceRange()); 1203 OpenMPDirectiveKind EmittedAs = emitSections(*this, S); 1204 // Emit an implicit barrier at the end. 1205 if (!S.getSingleClause(OMPC_nowait)) { 1206 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs); 1207 } 1208 } 1209 1210 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 1211 LexicalScope Scope(*this, S.getSourceRange()); 1212 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1213 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1214 CGF.EnsureInsertPoint(); 1215 }; 1216 CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); 1217 } 1218 1219 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 1220 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 1221 llvm::SmallVector<const Expr *, 8> DestExprs; 1222 llvm::SmallVector<const Expr *, 8> SrcExprs; 1223 llvm::SmallVector<const Expr *, 8> AssignmentOps; 1224 // Check if there are any 'copyprivate' clauses associated with this 1225 // 'single' 1226 // construct. 1227 auto CopyprivateFilter = [](const OMPClause *C) -> bool { 1228 return C->getClauseKind() == OMPC_copyprivate; 1229 }; 1230 // Build a list of copyprivate variables along with helper expressions 1231 // (<source>, <destination>, <destination>=<source> expressions) 1232 typedef OMPExecutableDirective::filtered_clause_iterator<decltype( 1233 CopyprivateFilter)> CopyprivateIter; 1234 for (CopyprivateIter I(S.clauses(), CopyprivateFilter); I; ++I) { 1235 auto *C = cast<OMPCopyprivateClause>(*I); 1236 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 1237 DestExprs.append(C->destination_exprs().begin(), 1238 C->destination_exprs().end()); 1239 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 1240 AssignmentOps.append(C->assignment_ops().begin(), 1241 C->assignment_ops().end()); 1242 } 1243 LexicalScope Scope(*this, S.getSourceRange()); 1244 // Emit code for 'single' region along with 'copyprivate' clauses 1245 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1246 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1247 CGF.EnsureInsertPoint(); 1248 }; 1249 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), 1250 CopyprivateVars, DestExprs, SrcExprs, 1251 AssignmentOps); 1252 // Emit an implicit barrier at the end. 1253 if (!S.getSingleClause(OMPC_nowait)) { 1254 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_single); 1255 } 1256 } 1257 1258 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 1259 LexicalScope Scope(*this, S.getSourceRange()); 1260 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1261 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1262 CGF.EnsureInsertPoint(); 1263 }; 1264 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); 1265 } 1266 1267 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 1268 LexicalScope Scope(*this, S.getSourceRange()); 1269 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1270 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1271 CGF.EnsureInsertPoint(); 1272 }; 1273 CGM.getOpenMPRuntime().emitCriticalRegion( 1274 *this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart()); 1275 } 1276 1277 void CodeGenFunction::EmitOMPParallelForDirective( 1278 const OMPParallelForDirective &S) { 1279 // Emit directive as a combined directive that consists of two implicit 1280 // directives: 'parallel' with 'for' directive. 1281 LexicalScope Scope(*this, S.getSourceRange()); 1282 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1283 CGF.EmitOMPWorksharingLoop(S); 1284 // Emit implicit barrier at the end of parallel region, but this barrier 1285 // is at the end of 'for' directive, so emit it as the implicit barrier for 1286 // this 'for' directive. 1287 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), 1288 OMPD_parallel); 1289 }; 1290 emitCommonOMPParallelDirective(*this, S, CodeGen); 1291 } 1292 1293 void CodeGenFunction::EmitOMPParallelForSimdDirective( 1294 const OMPParallelForSimdDirective &) { 1295 llvm_unreachable("CodeGen for 'omp parallel for simd' is not supported yet."); 1296 } 1297 1298 void CodeGenFunction::EmitOMPParallelSectionsDirective( 1299 const OMPParallelSectionsDirective &S) { 1300 // Emit directive as a combined directive that consists of two implicit 1301 // directives: 'parallel' with 'sections' directive. 1302 LexicalScope Scope(*this, S.getSourceRange()); 1303 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 1304 (void)emitSections(CGF, S); 1305 // Emit implicit barrier at the end of parallel region. 1306 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), 1307 OMPD_parallel); 1308 }; 1309 emitCommonOMPParallelDirective(*this, S, CodeGen); 1310 } 1311 1312 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 1313 // Emit outlined function for task construct. 1314 LexicalScope Scope(*this, S.getSourceRange()); 1315 auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); 1316 auto CapturedStruct = GenerateCapturedStmtArgument(*CS); 1317 auto *I = CS->getCapturedDecl()->param_begin(); 1318 auto *PartId = std::next(I); 1319 // The first function argument for tasks is a thread id, the second one is a 1320 // part id (0 for tied tasks, >=0 for untied task). 1321 auto &&CodeGen = [PartId, &S](CodeGenFunction &CGF) { 1322 if (*PartId) { 1323 // TODO: emit code for untied tasks. 1324 } 1325 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); 1326 }; 1327 auto OutlinedFn = 1328 CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, CodeGen); 1329 // Check if we should emit tied or untied task. 1330 bool Tied = !S.getSingleClause(OMPC_untied); 1331 // Check if the task is final 1332 llvm::PointerIntPair<llvm::Value *, 1, bool> Final; 1333 if (auto *Clause = S.getSingleClause(OMPC_final)) { 1334 // If the condition constant folds and can be elided, try to avoid emitting 1335 // the condition and the dead arm of the if/else. 1336 auto *Cond = cast<OMPFinalClause>(Clause)->getCondition(); 1337 bool CondConstant; 1338 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 1339 Final.setInt(CondConstant); 1340 else 1341 Final.setPointer(EvaluateExprAsBool(Cond)); 1342 } else { 1343 // By default the task is not final. 1344 Final.setInt(/*IntVal=*/false); 1345 } 1346 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 1347 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), Tied, Final, 1348 OutlinedFn, SharedsTy, CapturedStruct); 1349 } 1350 1351 void CodeGenFunction::EmitOMPTaskyieldDirective( 1352 const OMPTaskyieldDirective &S) { 1353 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); 1354 } 1355 1356 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 1357 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); 1358 } 1359 1360 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) { 1361 llvm_unreachable("CodeGen for 'omp taskwait' is not supported yet."); 1362 } 1363 1364 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 1365 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { 1366 if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) { 1367 auto FlushClause = cast<OMPFlushClause>(C); 1368 return llvm::makeArrayRef(FlushClause->varlist_begin(), 1369 FlushClause->varlist_end()); 1370 } 1371 return llvm::None; 1372 }(), S.getLocStart()); 1373 } 1374 1375 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) { 1376 llvm_unreachable("CodeGen for 'omp ordered' is not supported yet."); 1377 } 1378 1379 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 1380 QualType SrcType, QualType DestType) { 1381 assert(CGF.hasScalarEvaluationKind(DestType) && 1382 "DestType must have scalar evaluation kind."); 1383 assert(!Val.isAggregate() && "Must be a scalar or complex."); 1384 return Val.isScalar() 1385 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType) 1386 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, 1387 DestType); 1388 } 1389 1390 static CodeGenFunction::ComplexPairTy 1391 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 1392 QualType DestType) { 1393 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 1394 "DestType must have complex evaluation kind."); 1395 CodeGenFunction::ComplexPairTy ComplexVal; 1396 if (Val.isScalar()) { 1397 // Convert the input element to the element type of the complex. 1398 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 1399 auto ScalarVal = 1400 CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType); 1401 ComplexVal = CodeGenFunction::ComplexPairTy( 1402 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 1403 } else { 1404 assert(Val.isComplex() && "Must be a scalar or complex."); 1405 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 1406 auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); 1407 ComplexVal.first = CGF.EmitScalarConversion( 1408 Val.getComplexVal().first, SrcElementType, DestElementType); 1409 ComplexVal.second = CGF.EmitScalarConversion( 1410 Val.getComplexVal().second, SrcElementType, DestElementType); 1411 } 1412 return ComplexVal; 1413 } 1414 1415 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 1416 const Expr *X, const Expr *V, 1417 SourceLocation Loc) { 1418 // v = x; 1419 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 1420 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 1421 LValue XLValue = CGF.EmitLValue(X); 1422 LValue VLValue = CGF.EmitLValue(V); 1423 RValue Res = XLValue.isGlobalReg() 1424 ? CGF.EmitLoadOfLValue(XLValue, Loc) 1425 : CGF.EmitAtomicLoad(XLValue, Loc, 1426 IsSeqCst ? llvm::SequentiallyConsistent 1427 : llvm::Monotonic, 1428 XLValue.isVolatile()); 1429 // OpenMP, 2.12.6, atomic Construct 1430 // Any atomic construct with a seq_cst clause forces the atomically 1431 // performed operation to include an implicit flush operation without a 1432 // list. 1433 if (IsSeqCst) 1434 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 1435 switch (CGF.getEvaluationKind(V->getType())) { 1436 case TEK_Scalar: 1437 CGF.EmitStoreOfScalar( 1438 convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue); 1439 break; 1440 case TEK_Complex: 1441 CGF.EmitStoreOfComplex( 1442 convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue, 1443 /*isInit=*/false); 1444 break; 1445 case TEK_Aggregate: 1446 llvm_unreachable("Must be a scalar or complex."); 1447 } 1448 } 1449 1450 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 1451 const Expr *X, const Expr *E, 1452 SourceLocation Loc) { 1453 // x = expr; 1454 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 1455 LValue XLValue = CGF.EmitLValue(X); 1456 RValue ExprRValue = CGF.EmitAnyExpr(E); 1457 if (XLValue.isGlobalReg()) 1458 CGF.EmitStoreThroughGlobalRegLValue(ExprRValue, XLValue); 1459 else 1460 CGF.EmitAtomicStore(ExprRValue, XLValue, 1461 IsSeqCst ? llvm::SequentiallyConsistent 1462 : llvm::Monotonic, 1463 XLValue.isVolatile(), /*IsInit=*/false); 1464 // OpenMP, 2.12.6, atomic Construct 1465 // Any atomic construct with a seq_cst clause forces the atomically 1466 // performed operation to include an implicit flush operation without a 1467 // list. 1468 if (IsSeqCst) 1469 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 1470 } 1471 1472 bool emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, RValue Update, 1473 BinaryOperatorKind BO, llvm::AtomicOrdering AO, 1474 bool IsXLHSInRHSPart) { 1475 auto &Context = CGF.CGM.getContext(); 1476 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 1477 // expression is simple and atomic is allowed for the given type for the 1478 // target platform. 1479 if (BO == BO_Comma || !Update.isScalar() || 1480 !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() || 1481 (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 1482 (Update.getScalarVal()->getType() != 1483 X.getAddress()->getType()->getPointerElementType())) || 1484 !Context.getTargetInfo().hasBuiltinAtomic( 1485 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 1486 return false; 1487 1488 llvm::AtomicRMWInst::BinOp RMWOp; 1489 switch (BO) { 1490 case BO_Add: 1491 RMWOp = llvm::AtomicRMWInst::Add; 1492 break; 1493 case BO_Sub: 1494 if (!IsXLHSInRHSPart) 1495 return false; 1496 RMWOp = llvm::AtomicRMWInst::Sub; 1497 break; 1498 case BO_And: 1499 RMWOp = llvm::AtomicRMWInst::And; 1500 break; 1501 case BO_Or: 1502 RMWOp = llvm::AtomicRMWInst::Or; 1503 break; 1504 case BO_Xor: 1505 RMWOp = llvm::AtomicRMWInst::Xor; 1506 break; 1507 case BO_LT: 1508 RMWOp = X.getType()->hasSignedIntegerRepresentation() 1509 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 1510 : llvm::AtomicRMWInst::Max) 1511 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 1512 : llvm::AtomicRMWInst::UMax); 1513 break; 1514 case BO_GT: 1515 RMWOp = X.getType()->hasSignedIntegerRepresentation() 1516 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 1517 : llvm::AtomicRMWInst::Min) 1518 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 1519 : llvm::AtomicRMWInst::UMin); 1520 break; 1521 case BO_Mul: 1522 case BO_Div: 1523 case BO_Rem: 1524 case BO_Shl: 1525 case BO_Shr: 1526 case BO_LAnd: 1527 case BO_LOr: 1528 return false; 1529 case BO_PtrMemD: 1530 case BO_PtrMemI: 1531 case BO_LE: 1532 case BO_GE: 1533 case BO_EQ: 1534 case BO_NE: 1535 case BO_Assign: 1536 case BO_AddAssign: 1537 case BO_SubAssign: 1538 case BO_AndAssign: 1539 case BO_OrAssign: 1540 case BO_XorAssign: 1541 case BO_MulAssign: 1542 case BO_DivAssign: 1543 case BO_RemAssign: 1544 case BO_ShlAssign: 1545 case BO_ShrAssign: 1546 case BO_Comma: 1547 llvm_unreachable("Unsupported atomic update operation"); 1548 } 1549 auto *UpdateVal = Update.getScalarVal(); 1550 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 1551 UpdateVal = CGF.Builder.CreateIntCast( 1552 IC, X.getAddress()->getType()->getPointerElementType(), 1553 X.getType()->hasSignedIntegerRepresentation()); 1554 } 1555 CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO); 1556 return true; 1557 } 1558 1559 void CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 1560 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 1561 llvm::AtomicOrdering AO, SourceLocation Loc, 1562 const llvm::function_ref<RValue(RValue)> &CommonGen) { 1563 // Update expressions are allowed to have the following forms: 1564 // x binop= expr; -> xrval + expr; 1565 // x++, ++x -> xrval + 1; 1566 // x--, --x -> xrval - 1; 1567 // x = x binop expr; -> xrval binop expr 1568 // x = expr Op x; - > expr binop xrval; 1569 if (!emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart)) { 1570 if (X.isGlobalReg()) { 1571 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 1572 // 'xrval'. 1573 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 1574 } else { 1575 // Perform compare-and-swap procedure. 1576 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 1577 } 1578 } 1579 } 1580 1581 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 1582 const Expr *X, const Expr *E, 1583 const Expr *UE, bool IsXLHSInRHSPart, 1584 SourceLocation Loc) { 1585 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 1586 "Update expr in 'atomic update' must be a binary operator."); 1587 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 1588 // Update expressions are allowed to have the following forms: 1589 // x binop= expr; -> xrval + expr; 1590 // x++, ++x -> xrval + 1; 1591 // x--, --x -> xrval - 1; 1592 // x = x binop expr; -> xrval binop expr 1593 // x = expr Op x; - > expr binop xrval; 1594 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 1595 LValue XLValue = CGF.EmitLValue(X); 1596 RValue ExprRValue = CGF.EmitAnyExpr(E); 1597 auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; 1598 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 1599 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 1600 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 1601 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 1602 auto Gen = 1603 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { 1604 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 1605 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 1606 return CGF.EmitAnyExpr(UE); 1607 }; 1608 CGF.EmitOMPAtomicSimpleUpdateExpr(XLValue, ExprRValue, BOUE->getOpcode(), 1609 IsXLHSInRHSPart, AO, Loc, Gen); 1610 // OpenMP, 2.12.6, atomic Construct 1611 // Any atomic construct with a seq_cst clause forces the atomically 1612 // performed operation to include an implicit flush operation without a 1613 // list. 1614 if (IsSeqCst) 1615 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 1616 } 1617 1618 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 1619 bool IsSeqCst, const Expr *X, const Expr *V, 1620 const Expr *E, const Expr *UE, 1621 bool IsXLHSInRHSPart, SourceLocation Loc) { 1622 switch (Kind) { 1623 case OMPC_read: 1624 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 1625 break; 1626 case OMPC_write: 1627 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 1628 break; 1629 case OMPC_unknown: 1630 case OMPC_update: 1631 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 1632 break; 1633 case OMPC_capture: 1634 llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet."); 1635 case OMPC_if: 1636 case OMPC_final: 1637 case OMPC_num_threads: 1638 case OMPC_private: 1639 case OMPC_firstprivate: 1640 case OMPC_lastprivate: 1641 case OMPC_reduction: 1642 case OMPC_safelen: 1643 case OMPC_collapse: 1644 case OMPC_default: 1645 case OMPC_seq_cst: 1646 case OMPC_shared: 1647 case OMPC_linear: 1648 case OMPC_aligned: 1649 case OMPC_copyin: 1650 case OMPC_copyprivate: 1651 case OMPC_flush: 1652 case OMPC_proc_bind: 1653 case OMPC_schedule: 1654 case OMPC_ordered: 1655 case OMPC_nowait: 1656 case OMPC_untied: 1657 case OMPC_threadprivate: 1658 case OMPC_mergeable: 1659 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 1660 } 1661 } 1662 1663 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 1664 bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst); 1665 OpenMPClauseKind Kind = OMPC_unknown; 1666 for (auto *C : S.clauses()) { 1667 // Find first clause (skip seq_cst clause, if it is first). 1668 if (C->getClauseKind() != OMPC_seq_cst) { 1669 Kind = C->getClauseKind(); 1670 break; 1671 } 1672 } 1673 1674 const auto *CS = 1675 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 1676 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) 1677 enterFullExpression(EWC); 1678 1679 LexicalScope Scope(*this, S.getSourceRange()); 1680 auto &&CodeGen = [&S, Kind, IsSeqCst](CodeGenFunction &CGF) { 1681 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(), 1682 S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart()); 1683 }; 1684 CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); 1685 } 1686 1687 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) { 1688 llvm_unreachable("CodeGen for 'omp target' is not supported yet."); 1689 } 1690 1691 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { 1692 llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); 1693 } 1694 1695