1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CGCleanup.h" 17 #include "clang/AST/Decl.h" 18 #include "clang/AST/StmtOpenMP.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/IR/CallSite.h" 21 #include "llvm/IR/DerivedTypes.h" 22 #include "llvm/IR/GlobalValue.h" 23 #include "llvm/IR/Value.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include <cassert> 26 27 using namespace clang; 28 using namespace CodeGen; 29 30 namespace { 31 /// \brief Base class for handling code generation inside OpenMP regions. 32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 33 public: 34 /// \brief Kinds of OpenMP regions used in codegen. 35 enum CGOpenMPRegionKind { 36 /// \brief Region with outlined function for standalone 'parallel' 37 /// directive. 38 ParallelOutlinedRegion, 39 /// \brief Region with outlined function for standalone 'task' directive. 40 TaskOutlinedRegion, 41 /// \brief Region for constructs that do not require function outlining, 42 /// like 'for', 'sections', 'atomic' etc. directives. 43 InlinedRegion, 44 /// \brief Region with outlined function for standalone 'target' directive. 45 TargetRegion, 46 }; 47 48 CGOpenMPRegionInfo(const CapturedStmt &CS, 49 const CGOpenMPRegionKind RegionKind, 50 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 51 bool HasCancel) 52 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 53 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 54 55 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 56 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 57 bool HasCancel) 58 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 59 Kind(Kind), HasCancel(HasCancel) {} 60 61 /// \brief Get a variable or parameter for storing global thread id 62 /// inside OpenMP construct. 63 virtual const VarDecl *getThreadIDVariable() const = 0; 64 65 /// \brief Emit the captured statement body. 66 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 67 68 /// \brief Get an LValue for the current ThreadID variable. 69 /// \return LValue for thread id variable. This LValue always has type int32*. 70 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 71 72 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 73 74 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 75 76 bool hasCancel() const { return HasCancel; } 77 78 static bool classof(const CGCapturedStmtInfo *Info) { 79 return Info->getKind() == CR_OpenMP; 80 } 81 82 protected: 83 CGOpenMPRegionKind RegionKind; 84 const RegionCodeGenTy &CodeGen; 85 OpenMPDirectiveKind Kind; 86 bool HasCancel; 87 }; 88 89 /// \brief API for captured statement code generation in OpenMP constructs. 90 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo { 91 public: 92 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 93 const RegionCodeGenTy &CodeGen, 94 OpenMPDirectiveKind Kind, bool HasCancel) 95 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 96 HasCancel), 97 ThreadIDVar(ThreadIDVar) { 98 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 99 } 100 /// \brief Get a variable or parameter for storing global thread id 101 /// inside OpenMP construct. 102 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 103 104 /// \brief Get the name of the capture helper. 105 StringRef getHelperName() const override { return ".omp_outlined."; } 106 107 static bool classof(const CGCapturedStmtInfo *Info) { 108 return CGOpenMPRegionInfo::classof(Info) && 109 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 110 ParallelOutlinedRegion; 111 } 112 113 private: 114 /// \brief A variable or parameter storing global thread id for OpenMP 115 /// constructs. 116 const VarDecl *ThreadIDVar; 117 }; 118 119 /// \brief API for captured statement code generation in OpenMP constructs. 120 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo { 121 public: 122 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 123 const VarDecl *ThreadIDVar, 124 const RegionCodeGenTy &CodeGen, 125 OpenMPDirectiveKind Kind, bool HasCancel) 126 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 127 ThreadIDVar(ThreadIDVar) { 128 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 129 } 130 /// \brief Get a variable or parameter for storing global thread id 131 /// inside OpenMP construct. 132 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 133 134 /// \brief Get an LValue for the current ThreadID variable. 135 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 136 137 /// \brief Get the name of the capture helper. 138 StringRef getHelperName() const override { return ".omp_outlined."; } 139 140 static bool classof(const CGCapturedStmtInfo *Info) { 141 return CGOpenMPRegionInfo::classof(Info) && 142 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 143 TaskOutlinedRegion; 144 } 145 146 private: 147 /// \brief A variable or parameter storing global thread id for OpenMP 148 /// constructs. 149 const VarDecl *ThreadIDVar; 150 }; 151 152 /// \brief API for inlined captured statement code generation in OpenMP 153 /// constructs. 154 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 155 public: 156 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 157 const RegionCodeGenTy &CodeGen, 158 OpenMPDirectiveKind Kind, bool HasCancel) 159 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 160 OldCSI(OldCSI), 161 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 162 // \brief Retrieve the value of the context parameter. 163 llvm::Value *getContextValue() const override { 164 if (OuterRegionInfo) 165 return OuterRegionInfo->getContextValue(); 166 llvm_unreachable("No context value for inlined OpenMP region"); 167 } 168 void setContextValue(llvm::Value *V) override { 169 if (OuterRegionInfo) { 170 OuterRegionInfo->setContextValue(V); 171 return; 172 } 173 llvm_unreachable("No context value for inlined OpenMP region"); 174 } 175 /// \brief Lookup the captured field decl for a variable. 176 const FieldDecl *lookup(const VarDecl *VD) const override { 177 if (OuterRegionInfo) 178 return OuterRegionInfo->lookup(VD); 179 // If there is no outer outlined region,no need to lookup in a list of 180 // captured variables, we can use the original one. 181 return nullptr; 182 } 183 FieldDecl *getThisFieldDecl() const override { 184 if (OuterRegionInfo) 185 return OuterRegionInfo->getThisFieldDecl(); 186 return nullptr; 187 } 188 /// \brief Get a variable or parameter for storing global thread id 189 /// inside OpenMP construct. 190 const VarDecl *getThreadIDVariable() const override { 191 if (OuterRegionInfo) 192 return OuterRegionInfo->getThreadIDVariable(); 193 return nullptr; 194 } 195 196 /// \brief Get the name of the capture helper. 197 StringRef getHelperName() const override { 198 if (auto *OuterRegionInfo = getOldCSI()) 199 return OuterRegionInfo->getHelperName(); 200 llvm_unreachable("No helper name for inlined OpenMP construct"); 201 } 202 203 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 204 205 static bool classof(const CGCapturedStmtInfo *Info) { 206 return CGOpenMPRegionInfo::classof(Info) && 207 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 208 } 209 210 private: 211 /// \brief CodeGen info about outer OpenMP region. 212 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 213 CGOpenMPRegionInfo *OuterRegionInfo; 214 }; 215 216 /// \brief API for captured statement code generation in OpenMP target 217 /// constructs. For this captures, implicit parameters are used instead of the 218 /// captured fields. 219 class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo { 220 public: 221 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 222 const RegionCodeGenTy &CodeGen) 223 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 224 /*HasCancel = */ false) {} 225 226 /// \brief This is unused for target regions because each starts executing 227 /// with a single thread. 228 const VarDecl *getThreadIDVariable() const override { return nullptr; } 229 230 /// \brief Get the name of the capture helper. 231 StringRef getHelperName() const override { return ".omp_offloading."; } 232 233 static bool classof(const CGCapturedStmtInfo *Info) { 234 return CGOpenMPRegionInfo::classof(Info) && 235 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 236 } 237 }; 238 239 /// \brief RAII for emitting code of OpenMP constructs. 240 class InlinedOpenMPRegionRAII { 241 CodeGenFunction &CGF; 242 243 public: 244 /// \brief Constructs region for combined constructs. 245 /// \param CodeGen Code generation sequence for combined directives. Includes 246 /// a list of functions used for code generation of implicitly inlined 247 /// regions. 248 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 249 OpenMPDirectiveKind Kind, bool HasCancel) 250 : CGF(CGF) { 251 // Start emission for the construct. 252 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 253 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 254 } 255 ~InlinedOpenMPRegionRAII() { 256 // Restore original CapturedStmtInfo only if we're done with code emission. 257 auto *OldCSI = 258 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 259 delete CGF.CapturedStmtInfo; 260 CGF.CapturedStmtInfo = OldCSI; 261 } 262 }; 263 264 } // anonymous namespace 265 266 static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr, 267 QualType Ty) { 268 AlignmentSource Source; 269 CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source); 270 return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align), 271 Ty->getPointeeType(), Source); 272 } 273 274 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 275 return emitLoadOfPointerLValue(CGF, 276 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 277 getThreadIDVariable()->getType()); 278 } 279 280 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 281 if (!CGF.HaveInsertPoint()) 282 return; 283 // 1.2.2 OpenMP Language Terminology 284 // Structured block - An executable statement with a single entry at the 285 // top and a single exit at the bottom. 286 // The point of exit cannot be a branch out of the structured block. 287 // longjmp() and throw() must not violate the entry/exit criteria. 288 CGF.EHStack.pushTerminate(); 289 { 290 CodeGenFunction::RunCleanupsScope Scope(CGF); 291 CodeGen(CGF); 292 } 293 CGF.EHStack.popTerminate(); 294 } 295 296 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 297 CodeGenFunction &CGF) { 298 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 299 getThreadIDVariable()->getType(), 300 AlignmentSource::Decl); 301 } 302 303 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 304 : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) { 305 IdentTy = llvm::StructType::create( 306 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 307 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 308 CGM.Int8PtrTy /* psource */, nullptr); 309 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 310 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 311 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 312 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 313 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 314 } 315 316 void CGOpenMPRuntime::clear() { 317 InternalVars.clear(); 318 } 319 320 // Layout information for ident_t. 321 static CharUnits getIdentAlign(CodeGenModule &CGM) { 322 return CGM.getPointerAlign(); 323 } 324 static CharUnits getIdentSize(CodeGenModule &CGM) { 325 assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); 326 return CharUnits::fromQuantity(16) + CGM.getPointerSize(); 327 } 328 static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) { 329 // All the fields except the last are i32, so this works beautifully. 330 return unsigned(Field) * CharUnits::fromQuantity(4); 331 } 332 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, 333 CGOpenMPRuntime::IdentFieldIndex Field, 334 const llvm::Twine &Name = "") { 335 auto Offset = getOffsetOfIdentField(Field); 336 return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); 337 } 338 339 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( 340 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 341 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 342 assert(ThreadIDVar->getType()->isPointerType() && 343 "thread id variable must be of type kmp_int32 *"); 344 const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 345 CodeGenFunction CGF(CGM, true); 346 bool HasCancel = false; 347 if (auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 348 HasCancel = OPD->hasCancel(); 349 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 350 HasCancel = OPSD->hasCancel(); 351 else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 352 HasCancel = OPFD->hasCancel(); 353 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 354 HasCancel); 355 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 356 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 357 } 358 359 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( 360 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 361 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 362 assert(!ThreadIDVar->getType()->isPointerType() && 363 "thread id variable must be of type kmp_int32 for tasks"); 364 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 365 CodeGenFunction CGF(CGM, true); 366 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 367 InnermostKind, 368 cast<OMPTaskDirective>(D).hasCancel()); 369 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 370 return CGF.GenerateCapturedStmtFunction(*CS); 371 } 372 373 Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { 374 CharUnits Align = getIdentAlign(CGM); 375 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 376 if (!Entry) { 377 if (!DefaultOpenMPPSource) { 378 // Initialize default location for psource field of ident_t structure of 379 // all ident_t objects. Format is ";file;function;line;column;;". 380 // Taken from 381 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 382 DefaultOpenMPPSource = 383 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 384 DefaultOpenMPPSource = 385 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 386 } 387 auto DefaultOpenMPLocation = new llvm::GlobalVariable( 388 CGM.getModule(), IdentTy, /*isConstant*/ true, 389 llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr); 390 DefaultOpenMPLocation->setUnnamedAddr(true); 391 DefaultOpenMPLocation->setAlignment(Align.getQuantity()); 392 393 llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true); 394 llvm::Constant *Values[] = {Zero, 395 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 396 Zero, Zero, DefaultOpenMPPSource}; 397 llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values); 398 DefaultOpenMPLocation->setInitializer(Init); 399 OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; 400 } 401 return Address(Entry, Align); 402 } 403 404 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 405 SourceLocation Loc, 406 OpenMPLocationFlags Flags) { 407 // If no debug info is generated - return global default location. 408 if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo || 409 Loc.isInvalid()) 410 return getOrCreateDefaultLocation(Flags).getPointer(); 411 412 assert(CGF.CurFn && "No function in current CodeGenFunction."); 413 414 Address LocValue = Address::invalid(); 415 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 416 if (I != OpenMPLocThreadIDMap.end()) 417 LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); 418 419 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 420 // GetOpenMPThreadID was called before this routine. 421 if (!LocValue.isValid()) { 422 // Generate "ident_t .kmpc_loc.addr;" 423 Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), 424 ".kmpc_loc.addr"); 425 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 426 Elem.second.DebugLoc = AI.getPointer(); 427 LocValue = AI; 428 429 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 430 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 431 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 432 CGM.getSize(getIdentSize(CGF.CGM))); 433 } 434 435 // char **psource = &.kmpc_loc_<flags>.addr.psource; 436 Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); 437 438 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 439 if (OMPDebugLoc == nullptr) { 440 SmallString<128> Buffer2; 441 llvm::raw_svector_ostream OS2(Buffer2); 442 // Build debug location 443 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 444 OS2 << ";" << PLoc.getFilename() << ";"; 445 if (const FunctionDecl *FD = 446 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 447 OS2 << FD->getQualifiedNameAsString(); 448 } 449 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 450 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 451 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 452 } 453 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 454 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 455 456 // Our callers always pass this to a runtime function, so for 457 // convenience, go ahead and return a naked pointer. 458 return LocValue.getPointer(); 459 } 460 461 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 462 SourceLocation Loc) { 463 assert(CGF.CurFn && "No function in current CodeGenFunction."); 464 465 llvm::Value *ThreadID = nullptr; 466 // Check whether we've already cached a load of the thread id in this 467 // function. 468 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 469 if (I != OpenMPLocThreadIDMap.end()) { 470 ThreadID = I->second.ThreadID; 471 if (ThreadID != nullptr) 472 return ThreadID; 473 } 474 if (auto OMPRegionInfo = 475 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 476 if (OMPRegionInfo->getThreadIDVariable()) { 477 // Check if this an outlined function with thread id passed as argument. 478 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 479 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 480 // If value loaded in entry block, cache it and use it everywhere in 481 // function. 482 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 483 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 484 Elem.second.ThreadID = ThreadID; 485 } 486 return ThreadID; 487 } 488 } 489 490 // This is not an outlined function region - need to call __kmpc_int32 491 // kmpc_global_thread_num(ident_t *loc). 492 // Generate thread id value and cache this value for use across the 493 // function. 494 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 495 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 496 ThreadID = 497 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 498 emitUpdateLocation(CGF, Loc)); 499 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 500 Elem.second.ThreadID = ThreadID; 501 return ThreadID; 502 } 503 504 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 505 assert(CGF.CurFn && "No function in current CodeGenFunction."); 506 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 507 OpenMPLocThreadIDMap.erase(CGF.CurFn); 508 } 509 510 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 511 return llvm::PointerType::getUnqual(IdentTy); 512 } 513 514 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 515 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 516 } 517 518 llvm::Constant * 519 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { 520 llvm::Constant *RTLFn = nullptr; 521 switch (Function) { 522 case OMPRTL__kmpc_fork_call: { 523 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 524 // microtask, ...); 525 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 526 getKmpc_MicroPointerTy()}; 527 llvm::FunctionType *FnTy = 528 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 529 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 530 break; 531 } 532 case OMPRTL__kmpc_global_thread_num: { 533 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 534 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 535 llvm::FunctionType *FnTy = 536 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 537 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 538 break; 539 } 540 case OMPRTL__kmpc_threadprivate_cached: { 541 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 542 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 543 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 544 CGM.VoidPtrTy, CGM.SizeTy, 545 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 546 llvm::FunctionType *FnTy = 547 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 548 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 549 break; 550 } 551 case OMPRTL__kmpc_critical: { 552 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 553 // kmp_critical_name *crit); 554 llvm::Type *TypeParams[] = { 555 getIdentTyPointerTy(), CGM.Int32Ty, 556 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 557 llvm::FunctionType *FnTy = 558 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 559 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 560 break; 561 } 562 case OMPRTL__kmpc_critical_with_hint: { 563 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 564 // kmp_critical_name *crit, uintptr_t hint); 565 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 566 llvm::PointerType::getUnqual(KmpCriticalNameTy), 567 CGM.IntPtrTy}; 568 llvm::FunctionType *FnTy = 569 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 570 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 571 break; 572 } 573 case OMPRTL__kmpc_threadprivate_register: { 574 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 575 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 576 // typedef void *(*kmpc_ctor)(void *); 577 auto KmpcCtorTy = 578 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 579 /*isVarArg*/ false)->getPointerTo(); 580 // typedef void *(*kmpc_cctor)(void *, void *); 581 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 582 auto KmpcCopyCtorTy = 583 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 584 /*isVarArg*/ false)->getPointerTo(); 585 // typedef void (*kmpc_dtor)(void *); 586 auto KmpcDtorTy = 587 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 588 ->getPointerTo(); 589 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 590 KmpcCopyCtorTy, KmpcDtorTy}; 591 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 592 /*isVarArg*/ false); 593 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 594 break; 595 } 596 case OMPRTL__kmpc_end_critical: { 597 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 598 // kmp_critical_name *crit); 599 llvm::Type *TypeParams[] = { 600 getIdentTyPointerTy(), CGM.Int32Ty, 601 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 602 llvm::FunctionType *FnTy = 603 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 604 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 605 break; 606 } 607 case OMPRTL__kmpc_cancel_barrier: { 608 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 609 // global_tid); 610 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 611 llvm::FunctionType *FnTy = 612 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 613 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 614 break; 615 } 616 case OMPRTL__kmpc_barrier: { 617 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 618 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 619 llvm::FunctionType *FnTy = 620 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 621 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 622 break; 623 } 624 case OMPRTL__kmpc_for_static_fini: { 625 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 626 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 627 llvm::FunctionType *FnTy = 628 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 629 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 630 break; 631 } 632 case OMPRTL__kmpc_push_num_threads: { 633 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 634 // kmp_int32 num_threads) 635 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 636 CGM.Int32Ty}; 637 llvm::FunctionType *FnTy = 638 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 639 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 640 break; 641 } 642 case OMPRTL__kmpc_serialized_parallel: { 643 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 644 // global_tid); 645 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 646 llvm::FunctionType *FnTy = 647 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 648 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 649 break; 650 } 651 case OMPRTL__kmpc_end_serialized_parallel: { 652 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 653 // global_tid); 654 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 655 llvm::FunctionType *FnTy = 656 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 657 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 658 break; 659 } 660 case OMPRTL__kmpc_flush: { 661 // Build void __kmpc_flush(ident_t *loc); 662 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 663 llvm::FunctionType *FnTy = 664 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 665 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 666 break; 667 } 668 case OMPRTL__kmpc_master: { 669 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 670 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 671 llvm::FunctionType *FnTy = 672 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 673 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 674 break; 675 } 676 case OMPRTL__kmpc_end_master: { 677 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 678 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 679 llvm::FunctionType *FnTy = 680 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 681 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 682 break; 683 } 684 case OMPRTL__kmpc_omp_taskyield: { 685 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 686 // int end_part); 687 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 688 llvm::FunctionType *FnTy = 689 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 690 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 691 break; 692 } 693 case OMPRTL__kmpc_single: { 694 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 695 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 696 llvm::FunctionType *FnTy = 697 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 698 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 699 break; 700 } 701 case OMPRTL__kmpc_end_single: { 702 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 703 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 704 llvm::FunctionType *FnTy = 705 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 706 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 707 break; 708 } 709 case OMPRTL__kmpc_omp_task_alloc: { 710 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 711 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 712 // kmp_routine_entry_t *task_entry); 713 assert(KmpRoutineEntryPtrTy != nullptr && 714 "Type kmp_routine_entry_t must be created."); 715 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 716 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 717 // Return void * and then cast to particular kmp_task_t type. 718 llvm::FunctionType *FnTy = 719 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 720 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 721 break; 722 } 723 case OMPRTL__kmpc_omp_task: { 724 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 725 // *new_task); 726 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 727 CGM.VoidPtrTy}; 728 llvm::FunctionType *FnTy = 729 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 730 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 731 break; 732 } 733 case OMPRTL__kmpc_copyprivate: { 734 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 735 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 736 // kmp_int32 didit); 737 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 738 auto *CpyFnTy = 739 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 740 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 741 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 742 CGM.Int32Ty}; 743 llvm::FunctionType *FnTy = 744 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 745 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 746 break; 747 } 748 case OMPRTL__kmpc_reduce: { 749 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 750 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 751 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 752 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 753 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 754 /*isVarArg=*/false); 755 llvm::Type *TypeParams[] = { 756 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 757 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 758 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 759 llvm::FunctionType *FnTy = 760 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 761 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 762 break; 763 } 764 case OMPRTL__kmpc_reduce_nowait: { 765 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 766 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 767 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 768 // *lck); 769 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 770 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 771 /*isVarArg=*/false); 772 llvm::Type *TypeParams[] = { 773 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 774 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 775 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 776 llvm::FunctionType *FnTy = 777 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 778 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 779 break; 780 } 781 case OMPRTL__kmpc_end_reduce: { 782 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 783 // kmp_critical_name *lck); 784 llvm::Type *TypeParams[] = { 785 getIdentTyPointerTy(), CGM.Int32Ty, 786 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 787 llvm::FunctionType *FnTy = 788 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 789 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 790 break; 791 } 792 case OMPRTL__kmpc_end_reduce_nowait: { 793 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 794 // kmp_critical_name *lck); 795 llvm::Type *TypeParams[] = { 796 getIdentTyPointerTy(), CGM.Int32Ty, 797 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 798 llvm::FunctionType *FnTy = 799 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 800 RTLFn = 801 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 802 break; 803 } 804 case OMPRTL__kmpc_omp_task_begin_if0: { 805 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 806 // *new_task); 807 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 808 CGM.VoidPtrTy}; 809 llvm::FunctionType *FnTy = 810 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 811 RTLFn = 812 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 813 break; 814 } 815 case OMPRTL__kmpc_omp_task_complete_if0: { 816 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 817 // *new_task); 818 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 819 CGM.VoidPtrTy}; 820 llvm::FunctionType *FnTy = 821 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 822 RTLFn = CGM.CreateRuntimeFunction(FnTy, 823 /*Name=*/"__kmpc_omp_task_complete_if0"); 824 break; 825 } 826 case OMPRTL__kmpc_ordered: { 827 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 828 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 829 llvm::FunctionType *FnTy = 830 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 831 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 832 break; 833 } 834 case OMPRTL__kmpc_end_ordered: { 835 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 836 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 837 llvm::FunctionType *FnTy = 838 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 839 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 840 break; 841 } 842 case OMPRTL__kmpc_omp_taskwait: { 843 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 844 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 845 llvm::FunctionType *FnTy = 846 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 847 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 848 break; 849 } 850 case OMPRTL__kmpc_taskgroup: { 851 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 852 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 853 llvm::FunctionType *FnTy = 854 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 855 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 856 break; 857 } 858 case OMPRTL__kmpc_end_taskgroup: { 859 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 860 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 861 llvm::FunctionType *FnTy = 862 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 863 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 864 break; 865 } 866 case OMPRTL__kmpc_push_proc_bind: { 867 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 868 // int proc_bind) 869 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 870 llvm::FunctionType *FnTy = 871 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 872 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 873 break; 874 } 875 case OMPRTL__kmpc_omp_task_with_deps: { 876 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 877 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 878 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 879 llvm::Type *TypeParams[] = { 880 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 881 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 882 llvm::FunctionType *FnTy = 883 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 884 RTLFn = 885 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 886 break; 887 } 888 case OMPRTL__kmpc_omp_wait_deps: { 889 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 890 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 891 // kmp_depend_info_t *noalias_dep_list); 892 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 893 CGM.Int32Ty, CGM.VoidPtrTy, 894 CGM.Int32Ty, CGM.VoidPtrTy}; 895 llvm::FunctionType *FnTy = 896 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 897 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 898 break; 899 } 900 case OMPRTL__kmpc_cancellationpoint: { 901 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 902 // global_tid, kmp_int32 cncl_kind) 903 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 904 llvm::FunctionType *FnTy = 905 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 906 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 907 break; 908 } 909 case OMPRTL__kmpc_cancel: { 910 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 911 // kmp_int32 cncl_kind) 912 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 913 llvm::FunctionType *FnTy = 914 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 915 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 916 break; 917 } 918 case OMPRTL__tgt_target: { 919 // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t 920 // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t 921 // *arg_types); 922 llvm::Type *TypeParams[] = {CGM.Int32Ty, 923 CGM.VoidPtrTy, 924 CGM.Int32Ty, 925 CGM.VoidPtrPtrTy, 926 CGM.VoidPtrPtrTy, 927 CGM.SizeTy->getPointerTo(), 928 CGM.Int32Ty->getPointerTo()}; 929 llvm::FunctionType *FnTy = 930 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 931 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 932 break; 933 } 934 } 935 return RTLFn; 936 } 937 938 static llvm::Value *getTypeSize(CodeGenFunction &CGF, QualType Ty) { 939 auto &C = CGF.getContext(); 940 llvm::Value *Size = nullptr; 941 auto SizeInChars = C.getTypeSizeInChars(Ty); 942 if (SizeInChars.isZero()) { 943 // getTypeSizeInChars() returns 0 for a VLA. 944 while (auto *VAT = C.getAsVariableArrayType(Ty)) { 945 llvm::Value *ArraySize; 946 std::tie(ArraySize, Ty) = CGF.getVLASize(VAT); 947 Size = Size ? CGF.Builder.CreateNUWMul(Size, ArraySize) : ArraySize; 948 } 949 SizeInChars = C.getTypeSizeInChars(Ty); 950 assert(!SizeInChars.isZero()); 951 Size = CGF.Builder.CreateNUWMul( 952 Size, llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity())); 953 } else 954 Size = llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity()); 955 return Size; 956 } 957 958 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 959 bool IVSigned) { 960 assert((IVSize == 32 || IVSize == 64) && 961 "IV size is not compatible with the omp runtime"); 962 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 963 : "__kmpc_for_static_init_4u") 964 : (IVSigned ? "__kmpc_for_static_init_8" 965 : "__kmpc_for_static_init_8u"); 966 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 967 auto PtrTy = llvm::PointerType::getUnqual(ITy); 968 llvm::Type *TypeParams[] = { 969 getIdentTyPointerTy(), // loc 970 CGM.Int32Ty, // tid 971 CGM.Int32Ty, // schedtype 972 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 973 PtrTy, // p_lower 974 PtrTy, // p_upper 975 PtrTy, // p_stride 976 ITy, // incr 977 ITy // chunk 978 }; 979 llvm::FunctionType *FnTy = 980 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 981 return CGM.CreateRuntimeFunction(FnTy, Name); 982 } 983 984 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 985 bool IVSigned) { 986 assert((IVSize == 32 || IVSize == 64) && 987 "IV size is not compatible with the omp runtime"); 988 auto Name = 989 IVSize == 32 990 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 991 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 992 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 993 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 994 CGM.Int32Ty, // tid 995 CGM.Int32Ty, // schedtype 996 ITy, // lower 997 ITy, // upper 998 ITy, // stride 999 ITy // chunk 1000 }; 1001 llvm::FunctionType *FnTy = 1002 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1003 return CGM.CreateRuntimeFunction(FnTy, Name); 1004 } 1005 1006 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, 1007 bool IVSigned) { 1008 assert((IVSize == 32 || IVSize == 64) && 1009 "IV size is not compatible with the omp runtime"); 1010 auto Name = 1011 IVSize == 32 1012 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1013 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1014 llvm::Type *TypeParams[] = { 1015 getIdentTyPointerTy(), // loc 1016 CGM.Int32Ty, // tid 1017 }; 1018 llvm::FunctionType *FnTy = 1019 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1020 return CGM.CreateRuntimeFunction(FnTy, Name); 1021 } 1022 1023 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 1024 bool IVSigned) { 1025 assert((IVSize == 32 || IVSize == 64) && 1026 "IV size is not compatible with the omp runtime"); 1027 auto Name = 1028 IVSize == 32 1029 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1030 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1031 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1032 auto PtrTy = llvm::PointerType::getUnqual(ITy); 1033 llvm::Type *TypeParams[] = { 1034 getIdentTyPointerTy(), // loc 1035 CGM.Int32Ty, // tid 1036 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1037 PtrTy, // p_lower 1038 PtrTy, // p_upper 1039 PtrTy // p_stride 1040 }; 1041 llvm::FunctionType *FnTy = 1042 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1043 return CGM.CreateRuntimeFunction(FnTy, Name); 1044 } 1045 1046 llvm::Constant * 1047 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1048 assert(!CGM.getLangOpts().OpenMPUseTLS || 1049 !CGM.getContext().getTargetInfo().isTLSSupported()); 1050 // Lookup the entry, lazily creating it if necessary. 1051 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 1052 Twine(CGM.getMangledName(VD)) + ".cache."); 1053 } 1054 1055 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1056 const VarDecl *VD, 1057 Address VDAddr, 1058 SourceLocation Loc) { 1059 if (CGM.getLangOpts().OpenMPUseTLS && 1060 CGM.getContext().getTargetInfo().isTLSSupported()) 1061 return VDAddr; 1062 1063 auto VarTy = VDAddr.getElementType(); 1064 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1065 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1066 CGM.Int8PtrTy), 1067 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1068 getOrCreateThreadPrivateCache(VD)}; 1069 return Address(CGF.EmitRuntimeCall( 1070 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 1071 VDAddr.getAlignment()); 1072 } 1073 1074 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1075 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1076 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1077 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1078 // library. 1079 auto OMPLoc = emitUpdateLocation(CGF, Loc); 1080 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1081 OMPLoc); 1082 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1083 // to register constructor/destructor for variable. 1084 llvm::Value *Args[] = {OMPLoc, 1085 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1086 CGM.VoidPtrTy), 1087 Ctor, CopyCtor, Dtor}; 1088 CGF.EmitRuntimeCall( 1089 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 1090 } 1091 1092 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1093 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1094 bool PerformInit, CodeGenFunction *CGF) { 1095 if (CGM.getLangOpts().OpenMPUseTLS && 1096 CGM.getContext().getTargetInfo().isTLSSupported()) 1097 return nullptr; 1098 1099 VD = VD->getDefinition(CGM.getContext()); 1100 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 1101 ThreadPrivateWithDefinition.insert(VD); 1102 QualType ASTTy = VD->getType(); 1103 1104 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1105 auto Init = VD->getAnyInitializer(); 1106 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1107 // Generate function that re-emits the declaration's initializer into the 1108 // threadprivate copy of the variable VD 1109 CodeGenFunction CtorCGF(CGM); 1110 FunctionArgList Args; 1111 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1112 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1113 Args.push_back(&Dst); 1114 1115 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1116 CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(), 1117 /*isVariadic=*/false); 1118 auto FTy = CGM.getTypes().GetFunctionType(FI); 1119 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1120 FTy, ".__kmpc_global_ctor_.", FI, Loc); 1121 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1122 Args, SourceLocation()); 1123 auto ArgVal = CtorCGF.EmitLoadOfScalar( 1124 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1125 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1126 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1127 Arg = CtorCGF.Builder.CreateElementBitCast(Arg, 1128 CtorCGF.ConvertTypeForMem(ASTTy)); 1129 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1130 /*IsInitializer=*/true); 1131 ArgVal = CtorCGF.EmitLoadOfScalar( 1132 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1133 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1134 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1135 CtorCGF.FinishFunction(); 1136 Ctor = Fn; 1137 } 1138 if (VD->getType().isDestructedType() != QualType::DK_none) { 1139 // Generate function that emits destructor call for the threadprivate copy 1140 // of the variable VD 1141 CodeGenFunction DtorCGF(CGM); 1142 FunctionArgList Args; 1143 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 1144 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 1145 Args.push_back(&Dst); 1146 1147 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1148 CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(), 1149 /*isVariadic=*/false); 1150 auto FTy = CGM.getTypes().GetFunctionType(FI); 1151 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 1152 FTy, ".__kmpc_global_dtor_.", FI, Loc); 1153 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1154 SourceLocation()); 1155 auto ArgVal = DtorCGF.EmitLoadOfScalar( 1156 DtorCGF.GetAddrOfLocalVar(&Dst), 1157 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1158 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1159 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1160 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1161 DtorCGF.FinishFunction(); 1162 Dtor = Fn; 1163 } 1164 // Do not emit init function if it is not required. 1165 if (!Ctor && !Dtor) 1166 return nullptr; 1167 1168 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1169 auto CopyCtorTy = 1170 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1171 /*isVarArg=*/false)->getPointerTo(); 1172 // Copying constructor for the threadprivate variable. 1173 // Must be NULL - reserved by runtime, but currently it requires that this 1174 // parameter is always NULL. Otherwise it fires assertion. 1175 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1176 if (Ctor == nullptr) { 1177 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1178 /*isVarArg=*/false)->getPointerTo(); 1179 Ctor = llvm::Constant::getNullValue(CtorTy); 1180 } 1181 if (Dtor == nullptr) { 1182 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1183 /*isVarArg=*/false)->getPointerTo(); 1184 Dtor = llvm::Constant::getNullValue(DtorTy); 1185 } 1186 if (!CGF) { 1187 auto InitFunctionTy = 1188 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1189 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 1190 InitFunctionTy, ".__omp_threadprivate_init_.", 1191 CGM.getTypes().arrangeNullaryFunction()); 1192 CodeGenFunction InitCGF(CGM); 1193 FunctionArgList ArgList; 1194 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1195 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1196 Loc); 1197 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1198 InitCGF.FinishFunction(); 1199 return InitFunction; 1200 } 1201 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1202 } 1203 return nullptr; 1204 } 1205 1206 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen 1207 /// function. Here is the logic: 1208 /// if (Cond) { 1209 /// ThenGen(); 1210 /// } else { 1211 /// ElseGen(); 1212 /// } 1213 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 1214 const RegionCodeGenTy &ThenGen, 1215 const RegionCodeGenTy &ElseGen) { 1216 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1217 1218 // If the condition constant folds and can be elided, try to avoid emitting 1219 // the condition and the dead arm of the if/else. 1220 bool CondConstant; 1221 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 1222 CodeGenFunction::RunCleanupsScope Scope(CGF); 1223 if (CondConstant) { 1224 ThenGen(CGF); 1225 } else { 1226 ElseGen(CGF); 1227 } 1228 return; 1229 } 1230 1231 // Otherwise, the condition did not fold, or we couldn't elide it. Just 1232 // emit the conditional branch. 1233 auto ThenBlock = CGF.createBasicBlock("omp_if.then"); 1234 auto ElseBlock = CGF.createBasicBlock("omp_if.else"); 1235 auto ContBlock = CGF.createBasicBlock("omp_if.end"); 1236 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 1237 1238 // Emit the 'then' code. 1239 CGF.EmitBlock(ThenBlock); 1240 { 1241 CodeGenFunction::RunCleanupsScope ThenScope(CGF); 1242 ThenGen(CGF); 1243 } 1244 CGF.EmitBranch(ContBlock); 1245 // Emit the 'else' code if present. 1246 { 1247 // There is no need to emit line number for unconditional branch. 1248 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 1249 CGF.EmitBlock(ElseBlock); 1250 } 1251 { 1252 CodeGenFunction::RunCleanupsScope ThenScope(CGF); 1253 ElseGen(CGF); 1254 } 1255 { 1256 // There is no need to emit line number for unconditional branch. 1257 auto NL = ApplyDebugLocation::CreateEmpty(CGF); 1258 CGF.EmitBranch(ContBlock); 1259 } 1260 // Emit the continuation block for code after the if. 1261 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 1262 } 1263 1264 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1265 llvm::Value *OutlinedFn, 1266 ArrayRef<llvm::Value *> CapturedVars, 1267 const Expr *IfCond) { 1268 if (!CGF.HaveInsertPoint()) 1269 return; 1270 auto *RTLoc = emitUpdateLocation(CGF, Loc); 1271 auto &&ThenGen = [this, OutlinedFn, CapturedVars, 1272 RTLoc](CodeGenFunction &CGF) { 1273 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 1274 llvm::Value *Args[] = { 1275 RTLoc, 1276 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 1277 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 1278 llvm::SmallVector<llvm::Value *, 16> RealArgs; 1279 RealArgs.append(std::begin(Args), std::end(Args)); 1280 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 1281 1282 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call); 1283 CGF.EmitRuntimeCall(RTLFn, RealArgs); 1284 }; 1285 auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc, 1286 Loc](CodeGenFunction &CGF) { 1287 auto ThreadID = getThreadID(CGF, Loc); 1288 // Build calls: 1289 // __kmpc_serialized_parallel(&Loc, GTid); 1290 llvm::Value *Args[] = {RTLoc, ThreadID}; 1291 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), 1292 Args); 1293 1294 // OutlinedFn(>id, &zero, CapturedStruct); 1295 auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc); 1296 Address ZeroAddr = 1297 CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), 1298 /*Name*/ ".zero.addr"); 1299 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 1300 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 1301 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 1302 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 1303 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 1304 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 1305 1306 // __kmpc_end_serialized_parallel(&Loc, GTid); 1307 llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID}; 1308 CGF.EmitRuntimeCall( 1309 createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs); 1310 }; 1311 if (IfCond) { 1312 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 1313 } else { 1314 CodeGenFunction::RunCleanupsScope Scope(CGF); 1315 ThenGen(CGF); 1316 } 1317 } 1318 1319 // If we're inside an (outlined) parallel region, use the region info's 1320 // thread-ID variable (it is passed in a first argument of the outlined function 1321 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 1322 // regular serial code region, get thread ID by calling kmp_int32 1323 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 1324 // return the address of that temp. 1325 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 1326 SourceLocation Loc) { 1327 if (auto OMPRegionInfo = 1328 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 1329 if (OMPRegionInfo->getThreadIDVariable()) 1330 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 1331 1332 auto ThreadID = getThreadID(CGF, Loc); 1333 auto Int32Ty = 1334 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 1335 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 1336 CGF.EmitStoreOfScalar(ThreadID, 1337 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 1338 1339 return ThreadIDTemp; 1340 } 1341 1342 llvm::Constant * 1343 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 1344 const llvm::Twine &Name) { 1345 SmallString<256> Buffer; 1346 llvm::raw_svector_ostream Out(Buffer); 1347 Out << Name; 1348 auto RuntimeName = Out.str(); 1349 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 1350 if (Elem.second) { 1351 assert(Elem.second->getType()->getPointerElementType() == Ty && 1352 "OMP internal variable has different type than requested"); 1353 return &*Elem.second; 1354 } 1355 1356 return Elem.second = new llvm::GlobalVariable( 1357 CGM.getModule(), Ty, /*IsConstant*/ false, 1358 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 1359 Elem.first()); 1360 } 1361 1362 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 1363 llvm::Twine Name(".gomp_critical_user_", CriticalName); 1364 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 1365 } 1366 1367 namespace { 1368 template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup { 1369 llvm::Value *Callee; 1370 llvm::Value *Args[N]; 1371 1372 public: 1373 CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs) 1374 : Callee(Callee) { 1375 assert(CleanupArgs.size() == N); 1376 std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args)); 1377 } 1378 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 1379 if (!CGF.HaveInsertPoint()) 1380 return; 1381 CGF.EmitRuntimeCall(Callee, Args); 1382 } 1383 }; 1384 } // anonymous namespace 1385 1386 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 1387 StringRef CriticalName, 1388 const RegionCodeGenTy &CriticalOpGen, 1389 SourceLocation Loc, const Expr *Hint) { 1390 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 1391 // CriticalOpGen(); 1392 // __kmpc_end_critical(ident_t *, gtid, Lock); 1393 // Prepare arguments and build a call to __kmpc_critical 1394 if (!CGF.HaveInsertPoint()) 1395 return; 1396 CodeGenFunction::RunCleanupsScope Scope(CGF); 1397 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1398 getCriticalRegionLock(CriticalName)}; 1399 if (Hint) { 1400 llvm::SmallVector<llvm::Value *, 8> ArgsWithHint(std::begin(Args), 1401 std::end(Args)); 1402 auto *HintVal = CGF.EmitScalarExpr(Hint); 1403 ArgsWithHint.push_back( 1404 CGF.Builder.CreateIntCast(HintVal, CGM.IntPtrTy, /*isSigned=*/false)); 1405 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical_with_hint), 1406 ArgsWithHint); 1407 } else 1408 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args); 1409 // Build a call to __kmpc_end_critical 1410 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( 1411 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical), 1412 llvm::makeArrayRef(Args)); 1413 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 1414 } 1415 1416 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond, 1417 OpenMPDirectiveKind Kind, SourceLocation Loc, 1418 const RegionCodeGenTy &BodyOpGen) { 1419 llvm::Value *CallBool = CGF.EmitScalarConversion( 1420 IfCond, 1421 CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true), 1422 CGF.getContext().BoolTy, Loc); 1423 1424 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1425 auto *ContBlock = CGF.createBasicBlock("omp_if.end"); 1426 // Generate the branch (If-stmt) 1427 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 1428 CGF.EmitBlock(ThenBlock); 1429 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen); 1430 // Emit the rest of bblocks/branches 1431 CGF.EmitBranch(ContBlock); 1432 CGF.EmitBlock(ContBlock, true); 1433 } 1434 1435 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 1436 const RegionCodeGenTy &MasterOpGen, 1437 SourceLocation Loc) { 1438 if (!CGF.HaveInsertPoint()) 1439 return; 1440 // if(__kmpc_master(ident_t *, gtid)) { 1441 // MasterOpGen(); 1442 // __kmpc_end_master(ident_t *, gtid); 1443 // } 1444 // Prepare arguments and build a call to __kmpc_master 1445 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1446 auto *IsMaster = 1447 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args); 1448 typedef CallEndCleanup<std::extent<decltype(Args)>::value> 1449 MasterCallEndCleanup; 1450 emitIfStmt( 1451 CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void { 1452 CodeGenFunction::RunCleanupsScope Scope(CGF); 1453 CGF.EHStack.pushCleanup<MasterCallEndCleanup>( 1454 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master), 1455 llvm::makeArrayRef(Args)); 1456 MasterOpGen(CGF); 1457 }); 1458 } 1459 1460 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 1461 SourceLocation Loc) { 1462 if (!CGF.HaveInsertPoint()) 1463 return; 1464 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 1465 llvm::Value *Args[] = { 1466 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1467 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 1468 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 1469 } 1470 1471 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 1472 const RegionCodeGenTy &TaskgroupOpGen, 1473 SourceLocation Loc) { 1474 if (!CGF.HaveInsertPoint()) 1475 return; 1476 // __kmpc_taskgroup(ident_t *, gtid); 1477 // TaskgroupOpGen(); 1478 // __kmpc_end_taskgroup(ident_t *, gtid); 1479 // Prepare arguments and build a call to __kmpc_taskgroup 1480 { 1481 CodeGenFunction::RunCleanupsScope Scope(CGF); 1482 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1483 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args); 1484 // Build a call to __kmpc_end_taskgroup 1485 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( 1486 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 1487 llvm::makeArrayRef(Args)); 1488 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 1489 } 1490 } 1491 1492 /// Given an array of pointers to variables, project the address of a 1493 /// given variable. 1494 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 1495 unsigned Index, const VarDecl *Var) { 1496 // Pull out the pointer to the variable. 1497 Address PtrAddr = 1498 CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); 1499 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 1500 1501 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 1502 Addr = CGF.Builder.CreateElementBitCast( 1503 Addr, CGF.ConvertTypeForMem(Var->getType())); 1504 return Addr; 1505 } 1506 1507 static llvm::Value *emitCopyprivateCopyFunction( 1508 CodeGenModule &CGM, llvm::Type *ArgsType, 1509 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 1510 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) { 1511 auto &C = CGM.getContext(); 1512 // void copy_func(void *LHSArg, void *RHSArg); 1513 FunctionArgList Args; 1514 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1515 C.VoidPtrTy); 1516 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1517 C.VoidPtrTy); 1518 Args.push_back(&LHSArg); 1519 Args.push_back(&RHSArg); 1520 FunctionType::ExtInfo EI; 1521 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1522 C.VoidTy, Args, EI, /*isVariadic=*/false); 1523 auto *Fn = llvm::Function::Create( 1524 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 1525 ".omp.copyprivate.copy_func", &CGM.getModule()); 1526 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 1527 CodeGenFunction CGF(CGM); 1528 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 1529 // Dest = (void*[n])(LHSArg); 1530 // Src = (void*[n])(RHSArg); 1531 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1532 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 1533 ArgsType), CGF.getPointerAlign()); 1534 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1535 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 1536 ArgsType), CGF.getPointerAlign()); 1537 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 1538 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 1539 // ... 1540 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 1541 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 1542 auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 1543 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 1544 1545 auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 1546 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 1547 1548 auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 1549 QualType Type = VD->getType(); 1550 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 1551 } 1552 CGF.FinishFunction(); 1553 return Fn; 1554 } 1555 1556 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 1557 const RegionCodeGenTy &SingleOpGen, 1558 SourceLocation Loc, 1559 ArrayRef<const Expr *> CopyprivateVars, 1560 ArrayRef<const Expr *> SrcExprs, 1561 ArrayRef<const Expr *> DstExprs, 1562 ArrayRef<const Expr *> AssignmentOps) { 1563 if (!CGF.HaveInsertPoint()) 1564 return; 1565 assert(CopyprivateVars.size() == SrcExprs.size() && 1566 CopyprivateVars.size() == DstExprs.size() && 1567 CopyprivateVars.size() == AssignmentOps.size()); 1568 auto &C = CGM.getContext(); 1569 // int32 did_it = 0; 1570 // if(__kmpc_single(ident_t *, gtid)) { 1571 // SingleOpGen(); 1572 // __kmpc_end_single(ident_t *, gtid); 1573 // did_it = 1; 1574 // } 1575 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 1576 // <copy_func>, did_it); 1577 1578 Address DidIt = Address::invalid(); 1579 if (!CopyprivateVars.empty()) { 1580 // int32 did_it = 0; 1581 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1582 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 1583 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 1584 } 1585 // Prepare arguments and build a call to __kmpc_single 1586 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1587 auto *IsSingle = 1588 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args); 1589 typedef CallEndCleanup<std::extent<decltype(Args)>::value> 1590 SingleCallEndCleanup; 1591 emitIfStmt( 1592 CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void { 1593 CodeGenFunction::RunCleanupsScope Scope(CGF); 1594 CGF.EHStack.pushCleanup<SingleCallEndCleanup>( 1595 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single), 1596 llvm::makeArrayRef(Args)); 1597 SingleOpGen(CGF); 1598 if (DidIt.isValid()) { 1599 // did_it = 1; 1600 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 1601 } 1602 }); 1603 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 1604 // <copy_func>, did_it); 1605 if (DidIt.isValid()) { 1606 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 1607 auto CopyprivateArrayTy = 1608 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 1609 /*IndexTypeQuals=*/0); 1610 // Create a list of all private variables for copyprivate. 1611 Address CopyprivateList = 1612 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 1613 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 1614 Address Elem = CGF.Builder.CreateConstArrayGEP( 1615 CopyprivateList, I, CGF.getPointerSize()); 1616 CGF.Builder.CreateStore( 1617 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1618 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 1619 Elem); 1620 } 1621 // Build function that copies private values from single region to all other 1622 // threads in the corresponding parallel region. 1623 auto *CpyFn = emitCopyprivateCopyFunction( 1624 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 1625 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); 1626 auto *BufSize = getTypeSize(CGF, CopyprivateArrayTy); 1627 Address CL = 1628 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 1629 CGF.VoidPtrTy); 1630 auto *DidItVal = CGF.Builder.CreateLoad(DidIt); 1631 llvm::Value *Args[] = { 1632 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 1633 getThreadID(CGF, Loc), // i32 <gtid> 1634 BufSize, // size_t <buf_size> 1635 CL.getPointer(), // void *<copyprivate list> 1636 CpyFn, // void (*) (void *, void *) <copy_func> 1637 DidItVal // i32 did_it 1638 }; 1639 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 1640 } 1641 } 1642 1643 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 1644 const RegionCodeGenTy &OrderedOpGen, 1645 SourceLocation Loc, bool IsThreads) { 1646 if (!CGF.HaveInsertPoint()) 1647 return; 1648 // __kmpc_ordered(ident_t *, gtid); 1649 // OrderedOpGen(); 1650 // __kmpc_end_ordered(ident_t *, gtid); 1651 // Prepare arguments and build a call to __kmpc_ordered 1652 CodeGenFunction::RunCleanupsScope Scope(CGF); 1653 if (IsThreads) { 1654 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1655 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args); 1656 // Build a call to __kmpc_end_ordered 1657 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( 1658 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered), 1659 llvm::makeArrayRef(Args)); 1660 } 1661 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 1662 } 1663 1664 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 1665 OpenMPDirectiveKind Kind, bool EmitChecks, 1666 bool ForceSimpleCall) { 1667 if (!CGF.HaveInsertPoint()) 1668 return; 1669 // Build call __kmpc_cancel_barrier(loc, thread_id); 1670 // Build call __kmpc_barrier(loc, thread_id); 1671 OpenMPLocationFlags Flags = OMP_IDENT_KMPC; 1672 if (Kind == OMPD_for) { 1673 Flags = 1674 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR); 1675 } else if (Kind == OMPD_sections) { 1676 Flags = static_cast<OpenMPLocationFlags>(Flags | 1677 OMP_IDENT_BARRIER_IMPL_SECTIONS); 1678 } else if (Kind == OMPD_single) { 1679 Flags = 1680 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE); 1681 } else if (Kind == OMPD_barrier) { 1682 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL); 1683 } else { 1684 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL); 1685 } 1686 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 1687 // thread_id); 1688 auto *OMPRegionInfo = 1689 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 1690 // Do not emit barrier call in the single directive emitted in some rare cases 1691 // for sections directives. 1692 if (OMPRegionInfo && OMPRegionInfo->getDirectiveKind() == OMPD_single) 1693 return; 1694 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 1695 getThreadID(CGF, Loc)}; 1696 if (OMPRegionInfo) { 1697 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 1698 auto *Result = CGF.EmitRuntimeCall( 1699 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 1700 if (EmitChecks) { 1701 // if (__kmpc_cancel_barrier()) { 1702 // exit from construct; 1703 // } 1704 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 1705 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 1706 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 1707 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 1708 CGF.EmitBlock(ExitBB); 1709 // exit from construct; 1710 auto CancelDestination = 1711 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 1712 CGF.EmitBranchThroughCleanup(CancelDestination); 1713 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 1714 } 1715 return; 1716 } 1717 } 1718 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 1719 } 1720 1721 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 1722 /// the enum sched_type in kmp.h). 1723 enum OpenMPSchedType { 1724 /// \brief Lower bound for default (unordered) versions. 1725 OMP_sch_lower = 32, 1726 OMP_sch_static_chunked = 33, 1727 OMP_sch_static = 34, 1728 OMP_sch_dynamic_chunked = 35, 1729 OMP_sch_guided_chunked = 36, 1730 OMP_sch_runtime = 37, 1731 OMP_sch_auto = 38, 1732 /// \brief Lower bound for 'ordered' versions. 1733 OMP_ord_lower = 64, 1734 OMP_ord_static_chunked = 65, 1735 OMP_ord_static = 66, 1736 OMP_ord_dynamic_chunked = 67, 1737 OMP_ord_guided_chunked = 68, 1738 OMP_ord_runtime = 69, 1739 OMP_ord_auto = 70, 1740 OMP_sch_default = OMP_sch_static, 1741 }; 1742 1743 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 1744 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 1745 bool Chunked, bool Ordered) { 1746 switch (ScheduleKind) { 1747 case OMPC_SCHEDULE_static: 1748 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 1749 : (Ordered ? OMP_ord_static : OMP_sch_static); 1750 case OMPC_SCHEDULE_dynamic: 1751 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 1752 case OMPC_SCHEDULE_guided: 1753 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 1754 case OMPC_SCHEDULE_runtime: 1755 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 1756 case OMPC_SCHEDULE_auto: 1757 return Ordered ? OMP_ord_auto : OMP_sch_auto; 1758 case OMPC_SCHEDULE_unknown: 1759 assert(!Chunked && "chunk was specified but schedule kind not known"); 1760 return Ordered ? OMP_ord_static : OMP_sch_static; 1761 } 1762 llvm_unreachable("Unexpected runtime schedule"); 1763 } 1764 1765 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 1766 bool Chunked) const { 1767 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 1768 return Schedule == OMP_sch_static; 1769 } 1770 1771 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 1772 auto Schedule = 1773 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 1774 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 1775 return Schedule != OMP_sch_static; 1776 } 1777 1778 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, 1779 SourceLocation Loc, 1780 OpenMPScheduleClauseKind ScheduleKind, 1781 unsigned IVSize, bool IVSigned, 1782 bool Ordered, llvm::Value *UB, 1783 llvm::Value *Chunk) { 1784 if (!CGF.HaveInsertPoint()) 1785 return; 1786 OpenMPSchedType Schedule = 1787 getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); 1788 assert(Ordered || 1789 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 1790 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)); 1791 // Call __kmpc_dispatch_init( 1792 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 1793 // kmp_int[32|64] lower, kmp_int[32|64] upper, 1794 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 1795 1796 // If the Chunk was not specified in the clause - use default value 1. 1797 if (Chunk == nullptr) 1798 Chunk = CGF.Builder.getIntN(IVSize, 1); 1799 llvm::Value *Args[] = { 1800 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1801 getThreadID(CGF, Loc), 1802 CGF.Builder.getInt32(Schedule), // Schedule type 1803 CGF.Builder.getIntN(IVSize, 0), // Lower 1804 UB, // Upper 1805 CGF.Builder.getIntN(IVSize, 1), // Stride 1806 Chunk // Chunk 1807 }; 1808 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 1809 } 1810 1811 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 1812 SourceLocation Loc, 1813 OpenMPScheduleClauseKind ScheduleKind, 1814 unsigned IVSize, bool IVSigned, 1815 bool Ordered, Address IL, Address LB, 1816 Address UB, Address ST, 1817 llvm::Value *Chunk) { 1818 if (!CGF.HaveInsertPoint()) 1819 return; 1820 OpenMPSchedType Schedule = 1821 getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); 1822 assert(!Ordered); 1823 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 1824 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked); 1825 1826 // Call __kmpc_for_static_init( 1827 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 1828 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 1829 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 1830 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 1831 if (Chunk == nullptr) { 1832 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) && 1833 "expected static non-chunked schedule"); 1834 // If the Chunk was not specified in the clause - use default value 1. 1835 Chunk = CGF.Builder.getIntN(IVSize, 1); 1836 } else { 1837 assert((Schedule == OMP_sch_static_chunked || 1838 Schedule == OMP_ord_static_chunked) && 1839 "expected static chunked schedule"); 1840 } 1841 llvm::Value *Args[] = { 1842 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1843 getThreadID(CGF, Loc), 1844 CGF.Builder.getInt32(Schedule), // Schedule type 1845 IL.getPointer(), // &isLastIter 1846 LB.getPointer(), // &LB 1847 UB.getPointer(), // &UB 1848 ST.getPointer(), // &Stride 1849 CGF.Builder.getIntN(IVSize, 1), // Incr 1850 Chunk // Chunk 1851 }; 1852 CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args); 1853 } 1854 1855 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 1856 SourceLocation Loc) { 1857 if (!CGF.HaveInsertPoint()) 1858 return; 1859 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 1860 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1861 getThreadID(CGF, Loc)}; 1862 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 1863 Args); 1864 } 1865 1866 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 1867 SourceLocation Loc, 1868 unsigned IVSize, 1869 bool IVSigned) { 1870 if (!CGF.HaveInsertPoint()) 1871 return; 1872 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 1873 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1874 getThreadID(CGF, Loc)}; 1875 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 1876 } 1877 1878 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 1879 SourceLocation Loc, unsigned IVSize, 1880 bool IVSigned, Address IL, 1881 Address LB, Address UB, 1882 Address ST) { 1883 // Call __kmpc_dispatch_next( 1884 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 1885 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 1886 // kmp_int[32|64] *p_stride); 1887 llvm::Value *Args[] = { 1888 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc), 1889 IL.getPointer(), // &isLastIter 1890 LB.getPointer(), // &Lower 1891 UB.getPointer(), // &Upper 1892 ST.getPointer() // &Stride 1893 }; 1894 llvm::Value *Call = 1895 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 1896 return CGF.EmitScalarConversion( 1897 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 1898 CGF.getContext().BoolTy, Loc); 1899 } 1900 1901 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 1902 llvm::Value *NumThreads, 1903 SourceLocation Loc) { 1904 if (!CGF.HaveInsertPoint()) 1905 return; 1906 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 1907 llvm::Value *Args[] = { 1908 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1909 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 1910 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 1911 Args); 1912 } 1913 1914 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 1915 OpenMPProcBindClauseKind ProcBind, 1916 SourceLocation Loc) { 1917 if (!CGF.HaveInsertPoint()) 1918 return; 1919 // Constants for proc bind value accepted by the runtime. 1920 enum ProcBindTy { 1921 ProcBindFalse = 0, 1922 ProcBindTrue, 1923 ProcBindMaster, 1924 ProcBindClose, 1925 ProcBindSpread, 1926 ProcBindIntel, 1927 ProcBindDefault 1928 } RuntimeProcBind; 1929 switch (ProcBind) { 1930 case OMPC_PROC_BIND_master: 1931 RuntimeProcBind = ProcBindMaster; 1932 break; 1933 case OMPC_PROC_BIND_close: 1934 RuntimeProcBind = ProcBindClose; 1935 break; 1936 case OMPC_PROC_BIND_spread: 1937 RuntimeProcBind = ProcBindSpread; 1938 break; 1939 case OMPC_PROC_BIND_unknown: 1940 llvm_unreachable("Unsupported proc_bind value."); 1941 } 1942 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 1943 llvm::Value *Args[] = { 1944 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1945 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 1946 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 1947 } 1948 1949 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 1950 SourceLocation Loc) { 1951 if (!CGF.HaveInsertPoint()) 1952 return; 1953 // Build call void __kmpc_flush(ident_t *loc) 1954 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 1955 emitUpdateLocation(CGF, Loc)); 1956 } 1957 1958 namespace { 1959 /// \brief Indexes of fields for type kmp_task_t. 1960 enum KmpTaskTFields { 1961 /// \brief List of shared variables. 1962 KmpTaskTShareds, 1963 /// \brief Task routine. 1964 KmpTaskTRoutine, 1965 /// \brief Partition id for the untied tasks. 1966 KmpTaskTPartId, 1967 /// \brief Function with call of destructors for private variables. 1968 KmpTaskTDestructors, 1969 }; 1970 } // anonymous namespace 1971 1972 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 1973 if (!KmpRoutineEntryPtrTy) { 1974 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 1975 auto &C = CGM.getContext(); 1976 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 1977 FunctionProtoType::ExtProtoInfo EPI; 1978 KmpRoutineEntryPtrQTy = C.getPointerType( 1979 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 1980 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 1981 } 1982 } 1983 1984 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1985 QualType FieldTy) { 1986 auto *Field = FieldDecl::Create( 1987 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1988 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1989 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1990 Field->setAccess(AS_public); 1991 DC->addDecl(Field); 1992 return Field; 1993 } 1994 1995 namespace { 1996 struct PrivateHelpersTy { 1997 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 1998 const VarDecl *PrivateElemInit) 1999 : Original(Original), PrivateCopy(PrivateCopy), 2000 PrivateElemInit(PrivateElemInit) {} 2001 const VarDecl *Original; 2002 const VarDecl *PrivateCopy; 2003 const VarDecl *PrivateElemInit; 2004 }; 2005 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 2006 } // anonymous namespace 2007 2008 static RecordDecl * 2009 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 2010 if (!Privates.empty()) { 2011 auto &C = CGM.getContext(); 2012 // Build struct .kmp_privates_t. { 2013 // /* private vars */ 2014 // }; 2015 auto *RD = C.buildImplicitRecord(".kmp_privates.t"); 2016 RD->startDefinition(); 2017 for (auto &&Pair : Privates) { 2018 auto *VD = Pair.second.Original; 2019 auto Type = VD->getType(); 2020 Type = Type.getNonReferenceType(); 2021 auto *FD = addFieldToRecordDecl(C, RD, Type); 2022 if (VD->hasAttrs()) { 2023 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 2024 E(VD->getAttrs().end()); 2025 I != E; ++I) 2026 FD->addAttr(*I); 2027 } 2028 } 2029 RD->completeDefinition(); 2030 return RD; 2031 } 2032 return nullptr; 2033 } 2034 2035 static RecordDecl * 2036 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty, 2037 QualType KmpRoutineEntryPointerQTy) { 2038 auto &C = CGM.getContext(); 2039 // Build struct kmp_task_t { 2040 // void * shareds; 2041 // kmp_routine_entry_t routine; 2042 // kmp_int32 part_id; 2043 // kmp_routine_entry_t destructors; 2044 // }; 2045 auto *RD = C.buildImplicitRecord("kmp_task_t"); 2046 RD->startDefinition(); 2047 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2048 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 2049 addFieldToRecordDecl(C, RD, KmpInt32Ty); 2050 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 2051 RD->completeDefinition(); 2052 return RD; 2053 } 2054 2055 static RecordDecl * 2056 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 2057 ArrayRef<PrivateDataTy> Privates) { 2058 auto &C = CGM.getContext(); 2059 // Build struct kmp_task_t_with_privates { 2060 // kmp_task_t task_data; 2061 // .kmp_privates_t. privates; 2062 // }; 2063 auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 2064 RD->startDefinition(); 2065 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 2066 if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { 2067 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 2068 } 2069 RD->completeDefinition(); 2070 return RD; 2071 } 2072 2073 /// \brief Emit a proxy function which accepts kmp_task_t as the second 2074 /// argument. 2075 /// \code 2076 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 2077 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, 2078 /// tt->shareds); 2079 /// return 0; 2080 /// } 2081 /// \endcode 2082 static llvm::Value * 2083 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 2084 QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, 2085 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 2086 QualType SharedsPtrTy, llvm::Value *TaskFunction, 2087 llvm::Value *TaskPrivatesMap) { 2088 auto &C = CGM.getContext(); 2089 FunctionArgList Args; 2090 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 2091 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 2092 /*Id=*/nullptr, 2093 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 2094 Args.push_back(&GtidArg); 2095 Args.push_back(&TaskTypeArg); 2096 FunctionType::ExtInfo Info; 2097 auto &TaskEntryFnInfo = 2098 CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, 2099 /*isVariadic=*/false); 2100 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 2101 auto *TaskEntry = 2102 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 2103 ".omp_task_entry.", &CGM.getModule()); 2104 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo); 2105 CodeGenFunction CGF(CGM); 2106 CGF.disableDebugInfo(); 2107 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); 2108 2109 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 2110 // tt->task_data.shareds); 2111 auto *GtidParam = CGF.EmitLoadOfScalar( 2112 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 2113 LValue TDBase = emitLoadOfPointerLValue( 2114 CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy); 2115 auto *KmpTaskTWithPrivatesQTyRD = 2116 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 2117 LValue Base = 2118 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 2119 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 2120 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 2121 auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 2122 auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal(); 2123 2124 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 2125 auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 2126 auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2127 CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), 2128 CGF.ConvertTypeForMem(SharedsPtrTy)); 2129 2130 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 2131 llvm::Value *PrivatesParam; 2132 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 2133 auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 2134 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2135 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 2136 } else { 2137 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 2138 } 2139 2140 llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam, 2141 TaskPrivatesMap, SharedsParam}; 2142 CGF.EmitCallOrInvoke(TaskFunction, CallArgs); 2143 CGF.EmitStoreThroughLValue( 2144 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 2145 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 2146 CGF.FinishFunction(); 2147 return TaskEntry; 2148 } 2149 2150 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 2151 SourceLocation Loc, 2152 QualType KmpInt32Ty, 2153 QualType KmpTaskTWithPrivatesPtrQTy, 2154 QualType KmpTaskTWithPrivatesQTy) { 2155 auto &C = CGM.getContext(); 2156 FunctionArgList Args; 2157 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 2158 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 2159 /*Id=*/nullptr, 2160 KmpTaskTWithPrivatesPtrQTy.withRestrict()); 2161 Args.push_back(&GtidArg); 2162 Args.push_back(&TaskTypeArg); 2163 FunctionType::ExtInfo Info; 2164 auto &DestructorFnInfo = 2165 CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, 2166 /*isVariadic=*/false); 2167 auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); 2168 auto *DestructorFn = 2169 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 2170 ".omp_task_destructor.", &CGM.getModule()); 2171 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn, 2172 DestructorFnInfo); 2173 CodeGenFunction CGF(CGM); 2174 CGF.disableDebugInfo(); 2175 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 2176 Args); 2177 2178 LValue Base = emitLoadOfPointerLValue( 2179 CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy); 2180 auto *KmpTaskTWithPrivatesQTyRD = 2181 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 2182 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 2183 Base = CGF.EmitLValueForField(Base, *FI); 2184 for (auto *Field : 2185 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 2186 if (auto DtorKind = Field->getType().isDestructedType()) { 2187 auto FieldLValue = CGF.EmitLValueForField(Base, Field); 2188 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 2189 } 2190 } 2191 CGF.FinishFunction(); 2192 return DestructorFn; 2193 } 2194 2195 /// \brief Emit a privates mapping function for correct handling of private and 2196 /// firstprivate variables. 2197 /// \code 2198 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 2199 /// **noalias priv1,..., <tyn> **noalias privn) { 2200 /// *priv1 = &.privates.priv1; 2201 /// ...; 2202 /// *privn = &.privates.privn; 2203 /// } 2204 /// \endcode 2205 static llvm::Value * 2206 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 2207 ArrayRef<const Expr *> PrivateVars, 2208 ArrayRef<const Expr *> FirstprivateVars, 2209 QualType PrivatesQTy, 2210 ArrayRef<PrivateDataTy> Privates) { 2211 auto &C = CGM.getContext(); 2212 FunctionArgList Args; 2213 ImplicitParamDecl TaskPrivatesArg( 2214 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 2215 C.getPointerType(PrivatesQTy).withConst().withRestrict()); 2216 Args.push_back(&TaskPrivatesArg); 2217 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 2218 unsigned Counter = 1; 2219 for (auto *E: PrivateVars) { 2220 Args.push_back(ImplicitParamDecl::Create( 2221 C, /*DC=*/nullptr, Loc, 2222 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 2223 .withConst() 2224 .withRestrict())); 2225 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2226 PrivateVarsPos[VD] = Counter; 2227 ++Counter; 2228 } 2229 for (auto *E : FirstprivateVars) { 2230 Args.push_back(ImplicitParamDecl::Create( 2231 C, /*DC=*/nullptr, Loc, 2232 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) 2233 .withConst() 2234 .withRestrict())); 2235 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2236 PrivateVarsPos[VD] = Counter; 2237 ++Counter; 2238 } 2239 FunctionType::ExtInfo Info; 2240 auto &TaskPrivatesMapFnInfo = 2241 CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info, 2242 /*isVariadic=*/false); 2243 auto *TaskPrivatesMapTy = 2244 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 2245 auto *TaskPrivatesMap = llvm::Function::Create( 2246 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, 2247 ".omp_task_privates_map.", &CGM.getModule()); 2248 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, 2249 TaskPrivatesMapFnInfo); 2250 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 2251 CodeGenFunction CGF(CGM); 2252 CGF.disableDebugInfo(); 2253 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 2254 TaskPrivatesMapFnInfo, Args); 2255 2256 // *privi = &.privates.privi; 2257 LValue Base = emitLoadOfPointerLValue( 2258 CGF, CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType()); 2259 auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 2260 Counter = 0; 2261 for (auto *Field : PrivatesQTyRD->fields()) { 2262 auto FieldLVal = CGF.EmitLValueForField(Base, Field); 2263 auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 2264 auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 2265 auto RefLoadLVal = 2266 emitLoadOfPointerLValue(CGF, RefLVal.getAddress(), RefLVal.getType()); 2267 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 2268 ++Counter; 2269 } 2270 CGF.FinishFunction(); 2271 return TaskPrivatesMap; 2272 } 2273 2274 static int array_pod_sort_comparator(const PrivateDataTy *P1, 2275 const PrivateDataTy *P2) { 2276 return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); 2277 } 2278 2279 void CGOpenMPRuntime::emitTaskCall( 2280 CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, 2281 bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, 2282 llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, 2283 const Expr *IfCond, ArrayRef<const Expr *> PrivateVars, 2284 ArrayRef<const Expr *> PrivateCopies, 2285 ArrayRef<const Expr *> FirstprivateVars, 2286 ArrayRef<const Expr *> FirstprivateCopies, 2287 ArrayRef<const Expr *> FirstprivateInits, 2288 ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) { 2289 if (!CGF.HaveInsertPoint()) 2290 return; 2291 auto &C = CGM.getContext(); 2292 llvm::SmallVector<PrivateDataTy, 8> Privates; 2293 // Aggregate privates and sort them by the alignment. 2294 auto I = PrivateCopies.begin(); 2295 for (auto *E : PrivateVars) { 2296 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2297 Privates.push_back(std::make_pair( 2298 C.getDeclAlign(VD), 2299 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 2300 /*PrivateElemInit=*/nullptr))); 2301 ++I; 2302 } 2303 I = FirstprivateCopies.begin(); 2304 auto IElemInitRef = FirstprivateInits.begin(); 2305 for (auto *E : FirstprivateVars) { 2306 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2307 Privates.push_back(std::make_pair( 2308 C.getDeclAlign(VD), 2309 PrivateHelpersTy( 2310 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 2311 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); 2312 ++I, ++IElemInitRef; 2313 } 2314 llvm::array_pod_sort(Privates.begin(), Privates.end(), 2315 array_pod_sort_comparator); 2316 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2317 // Build type kmp_routine_entry_t (if not built yet). 2318 emitKmpRoutineEntryT(KmpInt32Ty); 2319 // Build type kmp_task_t (if not built yet). 2320 if (KmpTaskTQTy.isNull()) { 2321 KmpTaskTQTy = C.getRecordType( 2322 createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy)); 2323 } 2324 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 2325 // Build particular struct kmp_task_t for the given task. 2326 auto *KmpTaskTWithPrivatesQTyRD = 2327 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 2328 auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 2329 QualType KmpTaskTWithPrivatesPtrQTy = 2330 C.getPointerType(KmpTaskTWithPrivatesQTy); 2331 auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 2332 auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); 2333 auto *KmpTaskTWithPrivatesTySize = getTypeSize(CGF, KmpTaskTWithPrivatesQTy); 2334 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 2335 2336 // Emit initial values for private copies (if any). 2337 llvm::Value *TaskPrivatesMap = nullptr; 2338 auto *TaskPrivatesMapTy = 2339 std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(), 2340 3) 2341 ->getType(); 2342 if (!Privates.empty()) { 2343 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 2344 TaskPrivatesMap = emitTaskPrivateMappingFunction( 2345 CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates); 2346 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2347 TaskPrivatesMap, TaskPrivatesMapTy); 2348 } else { 2349 TaskPrivatesMap = llvm::ConstantPointerNull::get( 2350 cast<llvm::PointerType>(TaskPrivatesMapTy)); 2351 } 2352 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 2353 // kmp_task_t *tt); 2354 auto *TaskEntry = emitProxyTaskFunction( 2355 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy, 2356 KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); 2357 2358 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 2359 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2360 // kmp_routine_entry_t *task_entry); 2361 // Task flags. Format is taken from 2362 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 2363 // description of kmp_tasking_flags struct. 2364 const unsigned TiedFlag = 0x1; 2365 const unsigned FinalFlag = 0x2; 2366 unsigned Flags = Tied ? TiedFlag : 0; 2367 auto *TaskFlags = 2368 Final.getPointer() 2369 ? CGF.Builder.CreateSelect(Final.getPointer(), 2370 CGF.Builder.getInt32(FinalFlag), 2371 CGF.Builder.getInt32(/*C=*/0)) 2372 : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0); 2373 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 2374 auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 2375 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 2376 getThreadID(CGF, Loc), TaskFlags, 2377 KmpTaskTWithPrivatesTySize, SharedsSize, 2378 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2379 TaskEntry, KmpRoutineEntryPtrTy)}; 2380 auto *NewTask = CGF.EmitRuntimeCall( 2381 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 2382 auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2383 NewTask, KmpTaskTWithPrivatesPtrTy); 2384 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 2385 KmpTaskTWithPrivatesQTy); 2386 LValue TDBase = 2387 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 2388 // Fill the data in the resulting kmp_task_t record. 2389 // Copy shareds if there are any. 2390 Address KmpTaskSharedsPtr = Address::invalid(); 2391 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 2392 KmpTaskSharedsPtr = 2393 Address(CGF.EmitLoadOfScalar( 2394 CGF.EmitLValueForField( 2395 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 2396 KmpTaskTShareds)), 2397 Loc), 2398 CGF.getNaturalTypeAlignment(SharedsTy)); 2399 CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); 2400 } 2401 // Emit initial values for private copies (if any). 2402 bool NeedsCleanup = false; 2403 if (!Privates.empty()) { 2404 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 2405 auto PrivatesBase = CGF.EmitLValueForField(Base, *FI); 2406 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 2407 LValue SharedsBase; 2408 if (!FirstprivateVars.empty()) { 2409 SharedsBase = CGF.MakeAddrLValue( 2410 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2411 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 2412 SharedsTy); 2413 } 2414 CodeGenFunction::CGCapturedStmtInfo CapturesInfo( 2415 cast<CapturedStmt>(*D.getAssociatedStmt())); 2416 for (auto &&Pair : Privates) { 2417 auto *VD = Pair.second.PrivateCopy; 2418 auto *Init = VD->getAnyInitializer(); 2419 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 2420 if (Init) { 2421 if (auto *Elem = Pair.second.PrivateElemInit) { 2422 auto *OriginalVD = Pair.second.Original; 2423 auto *SharedField = CapturesInfo.lookup(OriginalVD); 2424 auto SharedRefLValue = 2425 CGF.EmitLValueForField(SharedsBase, SharedField); 2426 SharedRefLValue = CGF.MakeAddrLValue( 2427 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 2428 SharedRefLValue.getType(), AlignmentSource::Decl); 2429 QualType Type = OriginalVD->getType(); 2430 if (Type->isArrayType()) { 2431 // Initialize firstprivate array. 2432 if (!isa<CXXConstructExpr>(Init) || 2433 CGF.isTrivialInitializer(Init)) { 2434 // Perform simple memcpy. 2435 CGF.EmitAggregateAssign(PrivateLValue.getAddress(), 2436 SharedRefLValue.getAddress(), Type); 2437 } else { 2438 // Initialize firstprivate array using element-by-element 2439 // intialization. 2440 CGF.EmitOMPAggregateAssign( 2441 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), 2442 Type, [&CGF, Elem, Init, &CapturesInfo]( 2443 Address DestElement, Address SrcElement) { 2444 // Clean up any temporaries needed by the initialization. 2445 CodeGenFunction::OMPPrivateScope InitScope(CGF); 2446 InitScope.addPrivate(Elem, [SrcElement]() -> Address { 2447 return SrcElement; 2448 }); 2449 (void)InitScope.Privatize(); 2450 // Emit initialization for single element. 2451 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 2452 CGF, &CapturesInfo); 2453 CGF.EmitAnyExprToMem(Init, DestElement, 2454 Init->getType().getQualifiers(), 2455 /*IsInitializer=*/false); 2456 }); 2457 } 2458 } else { 2459 CodeGenFunction::OMPPrivateScope InitScope(CGF); 2460 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 2461 return SharedRefLValue.getAddress(); 2462 }); 2463 (void)InitScope.Privatize(); 2464 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 2465 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 2466 /*capturedByInit=*/false); 2467 } 2468 } else { 2469 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 2470 } 2471 } 2472 NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType(); 2473 ++FI; 2474 } 2475 } 2476 // Provide pointer to function with destructors for privates. 2477 llvm::Value *DestructorFn = 2478 NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty, 2479 KmpTaskTWithPrivatesPtrQTy, 2480 KmpTaskTWithPrivatesQTy) 2481 : llvm::ConstantPointerNull::get( 2482 cast<llvm::PointerType>(KmpRoutineEntryPtrTy)); 2483 LValue Destructor = CGF.EmitLValueForField( 2484 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors)); 2485 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2486 DestructorFn, KmpRoutineEntryPtrTy), 2487 Destructor); 2488 2489 // Process list of dependences. 2490 Address DependenciesArray = Address::invalid(); 2491 unsigned NumDependencies = Dependences.size(); 2492 if (NumDependencies) { 2493 // Dependence kind for RTL. 2494 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; 2495 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 2496 RecordDecl *KmpDependInfoRD; 2497 QualType FlagsTy = 2498 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 2499 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 2500 if (KmpDependInfoTy.isNull()) { 2501 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 2502 KmpDependInfoRD->startDefinition(); 2503 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 2504 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 2505 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 2506 KmpDependInfoRD->completeDefinition(); 2507 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 2508 } else { 2509 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 2510 } 2511 CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); 2512 // Define type kmp_depend_info[<Dependences.size()>]; 2513 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 2514 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 2515 ArrayType::Normal, /*IndexTypeQuals=*/0); 2516 // kmp_depend_info[<Dependences.size()>] deps; 2517 DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy); 2518 for (unsigned i = 0; i < NumDependencies; ++i) { 2519 const Expr *E = Dependences[i].second; 2520 auto Addr = CGF.EmitLValue(E); 2521 llvm::Value *Size; 2522 QualType Ty = E->getType(); 2523 if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 2524 LValue UpAddrLVal = 2525 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); 2526 llvm::Value *UpAddr = 2527 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 2528 llvm::Value *LowIntPtr = 2529 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 2530 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 2531 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 2532 } else 2533 Size = getTypeSize(CGF, Ty); 2534 auto Base = CGF.MakeAddrLValue( 2535 CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), 2536 KmpDependInfoTy); 2537 // deps[i].base_addr = &<Dependences[i].second>; 2538 auto BaseAddrLVal = CGF.EmitLValueForField( 2539 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 2540 CGF.EmitStoreOfScalar( 2541 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 2542 BaseAddrLVal); 2543 // deps[i].len = sizeof(<Dependences[i].second>); 2544 auto LenLVal = CGF.EmitLValueForField( 2545 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 2546 CGF.EmitStoreOfScalar(Size, LenLVal); 2547 // deps[i].flags = <Dependences[i].first>; 2548 RTLDependenceKindTy DepKind; 2549 switch (Dependences[i].first) { 2550 case OMPC_DEPEND_in: 2551 DepKind = DepIn; 2552 break; 2553 // Out and InOut dependencies must use the same code. 2554 case OMPC_DEPEND_out: 2555 case OMPC_DEPEND_inout: 2556 DepKind = DepInOut; 2557 break; 2558 case OMPC_DEPEND_source: 2559 case OMPC_DEPEND_unknown: 2560 llvm_unreachable("Unknown task dependence type"); 2561 } 2562 auto FlagsLVal = CGF.EmitLValueForField( 2563 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 2564 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 2565 FlagsLVal); 2566 } 2567 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2568 CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), 2569 CGF.VoidPtrTy); 2570 } 2571 2572 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 2573 // libcall. 2574 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2575 // *new_task); 2576 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2577 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2578 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 2579 // list is not empty 2580 auto *ThreadID = getThreadID(CGF, Loc); 2581 auto *UpLoc = emitUpdateLocation(CGF, Loc); 2582 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 2583 llvm::Value *DepTaskArgs[7]; 2584 if (NumDependencies) { 2585 DepTaskArgs[0] = UpLoc; 2586 DepTaskArgs[1] = ThreadID; 2587 DepTaskArgs[2] = NewTask; 2588 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 2589 DepTaskArgs[4] = DependenciesArray.getPointer(); 2590 DepTaskArgs[5] = CGF.Builder.getInt32(0); 2591 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 2592 } 2593 auto &&ThenCodeGen = [this, NumDependencies, 2594 &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) { 2595 // TODO: add check for untied tasks. 2596 if (NumDependencies) { 2597 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), 2598 DepTaskArgs); 2599 } else { 2600 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 2601 TaskArgs); 2602 } 2603 }; 2604 typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value> 2605 IfCallEndCleanup; 2606 2607 llvm::Value *DepWaitTaskArgs[6]; 2608 if (NumDependencies) { 2609 DepWaitTaskArgs[0] = UpLoc; 2610 DepWaitTaskArgs[1] = ThreadID; 2611 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 2612 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 2613 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 2614 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 2615 } 2616 auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 2617 NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) { 2618 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 2619 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2620 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 2621 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 2622 // is specified. 2623 if (NumDependencies) 2624 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 2625 DepWaitTaskArgs); 2626 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 2627 // kmp_task_t *new_task); 2628 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), 2629 TaskArgs); 2630 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 2631 // kmp_task_t *new_task); 2632 CGF.EHStack.pushCleanup<IfCallEndCleanup>( 2633 NormalAndEHCleanup, 2634 createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), 2635 llvm::makeArrayRef(TaskArgs)); 2636 2637 // Call proxy_task_entry(gtid, new_task); 2638 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 2639 CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); 2640 }; 2641 2642 if (IfCond) { 2643 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 2644 } else { 2645 CodeGenFunction::RunCleanupsScope Scope(CGF); 2646 ThenCodeGen(CGF); 2647 } 2648 } 2649 2650 /// \brief Emit reduction operation for each element of array (required for 2651 /// array sections) LHS op = RHS. 2652 /// \param Type Type of array. 2653 /// \param LHSVar Variable on the left side of the reduction operation 2654 /// (references element of array in original variable). 2655 /// \param RHSVar Variable on the right side of the reduction operation 2656 /// (references element of array in original variable). 2657 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 2658 /// RHSVar. 2659 static void EmitOMPAggregateReduction( 2660 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 2661 const VarDecl *RHSVar, 2662 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 2663 const Expr *, const Expr *)> &RedOpGen, 2664 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 2665 const Expr *UpExpr = nullptr) { 2666 // Perform element-by-element initialization. 2667 QualType ElementTy; 2668 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 2669 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 2670 2671 // Drill down to the base element type on both arrays. 2672 auto ArrayTy = Type->getAsArrayTypeUnsafe(); 2673 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 2674 2675 auto RHSBegin = RHSAddr.getPointer(); 2676 auto LHSBegin = LHSAddr.getPointer(); 2677 // Cast from pointer to array type to pointer to single element. 2678 auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 2679 // The basic structure here is a while-do loop. 2680 auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 2681 auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 2682 auto IsEmpty = 2683 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 2684 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 2685 2686 // Enter the loop body, making that address the current address. 2687 auto EntryBB = CGF.Builder.GetInsertBlock(); 2688 CGF.EmitBlock(BodyBB); 2689 2690 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 2691 2692 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 2693 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 2694 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 2695 Address RHSElementCurrent = 2696 Address(RHSElementPHI, 2697 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 2698 2699 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 2700 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 2701 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 2702 Address LHSElementCurrent = 2703 Address(LHSElementPHI, 2704 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 2705 2706 // Emit copy. 2707 CodeGenFunction::OMPPrivateScope Scope(CGF); 2708 Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; }); 2709 Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; }); 2710 Scope.Privatize(); 2711 RedOpGen(CGF, XExpr, EExpr, UpExpr); 2712 Scope.ForceCleanup(); 2713 2714 // Shift the address forward by one element. 2715 auto LHSElementNext = CGF.Builder.CreateConstGEP1_32( 2716 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 2717 auto RHSElementNext = CGF.Builder.CreateConstGEP1_32( 2718 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 2719 // Check whether we've reached the end. 2720 auto Done = 2721 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 2722 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 2723 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 2724 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 2725 2726 // Done. 2727 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 2728 } 2729 2730 static llvm::Value *emitReductionFunction(CodeGenModule &CGM, 2731 llvm::Type *ArgsType, 2732 ArrayRef<const Expr *> Privates, 2733 ArrayRef<const Expr *> LHSExprs, 2734 ArrayRef<const Expr *> RHSExprs, 2735 ArrayRef<const Expr *> ReductionOps) { 2736 auto &C = CGM.getContext(); 2737 2738 // void reduction_func(void *LHSArg, void *RHSArg); 2739 FunctionArgList Args; 2740 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2741 C.VoidPtrTy); 2742 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 2743 C.VoidPtrTy); 2744 Args.push_back(&LHSArg); 2745 Args.push_back(&RHSArg); 2746 FunctionType::ExtInfo EI; 2747 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( 2748 C.VoidTy, Args, EI, /*isVariadic=*/false); 2749 auto *Fn = llvm::Function::Create( 2750 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 2751 ".omp.reduction.reduction_func", &CGM.getModule()); 2752 CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); 2753 CodeGenFunction CGF(CGM); 2754 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 2755 2756 // Dst = (void*[n])(LHSArg); 2757 // Src = (void*[n])(RHSArg); 2758 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2759 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2760 ArgsType), CGF.getPointerAlign()); 2761 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2762 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2763 ArgsType), CGF.getPointerAlign()); 2764 2765 // ... 2766 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 2767 // ... 2768 CodeGenFunction::OMPPrivateScope Scope(CGF); 2769 auto IPriv = Privates.begin(); 2770 unsigned Idx = 0; 2771 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 2772 auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 2773 Scope.addPrivate(RHSVar, [&]() -> Address { 2774 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 2775 }); 2776 auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 2777 Scope.addPrivate(LHSVar, [&]() -> Address { 2778 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 2779 }); 2780 QualType PrivTy = (*IPriv)->getType(); 2781 if (PrivTy->isArrayType()) { 2782 // Get array size and emit VLA type. 2783 ++Idx; 2784 Address Elem = 2785 CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); 2786 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 2787 CodeGenFunction::OpaqueValueMapping OpaqueMap( 2788 CGF, 2789 cast<OpaqueValueExpr>( 2790 CGF.getContext().getAsVariableArrayType(PrivTy)->getSizeExpr()), 2791 RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 2792 CGF.EmitVariablyModifiedType(PrivTy); 2793 } 2794 } 2795 Scope.Privatize(); 2796 IPriv = Privates.begin(); 2797 auto ILHS = LHSExprs.begin(); 2798 auto IRHS = RHSExprs.begin(); 2799 for (auto *E : ReductionOps) { 2800 if ((*IPriv)->getType()->isArrayType()) { 2801 // Emit reduction for array section. 2802 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 2803 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 2804 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 2805 [=](CodeGenFunction &CGF, const Expr *, 2806 const Expr *, 2807 const Expr *) { CGF.EmitIgnoredExpr(E); }); 2808 } else 2809 // Emit reduction for array subscript or single variable. 2810 CGF.EmitIgnoredExpr(E); 2811 ++IPriv, ++ILHS, ++IRHS; 2812 } 2813 Scope.ForceCleanup(); 2814 CGF.FinishFunction(); 2815 return Fn; 2816 } 2817 2818 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 2819 ArrayRef<const Expr *> Privates, 2820 ArrayRef<const Expr *> LHSExprs, 2821 ArrayRef<const Expr *> RHSExprs, 2822 ArrayRef<const Expr *> ReductionOps, 2823 bool WithNowait, bool SimpleReduction) { 2824 if (!CGF.HaveInsertPoint()) 2825 return; 2826 // Next code should be emitted for reduction: 2827 // 2828 // static kmp_critical_name lock = { 0 }; 2829 // 2830 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 2831 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 2832 // ... 2833 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 2834 // *(Type<n>-1*)rhs[<n>-1]); 2835 // } 2836 // 2837 // ... 2838 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 2839 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 2840 // RedList, reduce_func, &<lock>)) { 2841 // case 1: 2842 // ... 2843 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 2844 // ... 2845 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 2846 // break; 2847 // case 2: 2848 // ... 2849 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 2850 // ... 2851 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 2852 // break; 2853 // default:; 2854 // } 2855 // 2856 // if SimpleReduction is true, only the next code is generated: 2857 // ... 2858 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 2859 // ... 2860 2861 auto &C = CGM.getContext(); 2862 2863 if (SimpleReduction) { 2864 CodeGenFunction::RunCleanupsScope Scope(CGF); 2865 auto IPriv = Privates.begin(); 2866 auto ILHS = LHSExprs.begin(); 2867 auto IRHS = RHSExprs.begin(); 2868 for (auto *E : ReductionOps) { 2869 if ((*IPriv)->getType()->isArrayType()) { 2870 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 2871 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 2872 EmitOMPAggregateReduction( 2873 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 2874 [=](CodeGenFunction &CGF, const Expr *, const Expr *, 2875 const Expr *) { CGF.EmitIgnoredExpr(E); }); 2876 } else 2877 CGF.EmitIgnoredExpr(E); 2878 ++IPriv, ++ILHS, ++IRHS; 2879 } 2880 return; 2881 } 2882 2883 // 1. Build a list of reduction variables. 2884 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 2885 auto Size = RHSExprs.size(); 2886 for (auto *E : Privates) { 2887 if (E->getType()->isArrayType()) 2888 // Reserve place for array size. 2889 ++Size; 2890 } 2891 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 2892 QualType ReductionArrayTy = 2893 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 2894 /*IndexTypeQuals=*/0); 2895 Address ReductionList = 2896 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 2897 auto IPriv = Privates.begin(); 2898 unsigned Idx = 0; 2899 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 2900 Address Elem = 2901 CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); 2902 CGF.Builder.CreateStore( 2903 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2904 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 2905 Elem); 2906 if ((*IPriv)->getType()->isArrayType()) { 2907 // Store array size. 2908 ++Idx; 2909 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, 2910 CGF.getPointerSize()); 2911 CGF.Builder.CreateStore( 2912 CGF.Builder.CreateIntToPtr( 2913 CGF.Builder.CreateIntCast( 2914 CGF.getVLASize(CGF.getContext().getAsVariableArrayType( 2915 (*IPriv)->getType())) 2916 .first, 2917 CGF.SizeTy, /*isSigned=*/false), 2918 CGF.VoidPtrTy), 2919 Elem); 2920 } 2921 } 2922 2923 // 2. Emit reduce_func(). 2924 auto *ReductionFn = emitReductionFunction( 2925 CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 2926 LHSExprs, RHSExprs, ReductionOps); 2927 2928 // 3. Create static kmp_critical_name lock = { 0 }; 2929 auto *Lock = getCriticalRegionLock(".reduction"); 2930 2931 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 2932 // RedList, reduce_func, &<lock>); 2933 auto *IdentTLoc = emitUpdateLocation( 2934 CGF, Loc, 2935 static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE)); 2936 auto *ThreadId = getThreadID(CGF, Loc); 2937 auto *ReductionArrayTySize = getTypeSize(CGF, ReductionArrayTy); 2938 auto *RL = 2939 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(), 2940 CGF.VoidPtrTy); 2941 llvm::Value *Args[] = { 2942 IdentTLoc, // ident_t *<loc> 2943 ThreadId, // i32 <gtid> 2944 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 2945 ReductionArrayTySize, // size_type sizeof(RedList) 2946 RL, // void *RedList 2947 ReductionFn, // void (*) (void *, void *) <reduce_func> 2948 Lock // kmp_critical_name *&<lock> 2949 }; 2950 auto Res = CGF.EmitRuntimeCall( 2951 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 2952 : OMPRTL__kmpc_reduce), 2953 Args); 2954 2955 // 5. Build switch(res) 2956 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 2957 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 2958 2959 // 6. Build case 1: 2960 // ... 2961 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 2962 // ... 2963 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 2964 // break; 2965 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 2966 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 2967 CGF.EmitBlock(Case1BB); 2968 2969 { 2970 CodeGenFunction::RunCleanupsScope Scope(CGF); 2971 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 2972 llvm::Value *EndArgs[] = { 2973 IdentTLoc, // ident_t *<loc> 2974 ThreadId, // i32 <gtid> 2975 Lock // kmp_critical_name *&<lock> 2976 }; 2977 CGF.EHStack 2978 .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>( 2979 NormalAndEHCleanup, 2980 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 2981 : OMPRTL__kmpc_end_reduce), 2982 llvm::makeArrayRef(EndArgs)); 2983 auto IPriv = Privates.begin(); 2984 auto ILHS = LHSExprs.begin(); 2985 auto IRHS = RHSExprs.begin(); 2986 for (auto *E : ReductionOps) { 2987 if ((*IPriv)->getType()->isArrayType()) { 2988 // Emit reduction for array section. 2989 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 2990 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 2991 EmitOMPAggregateReduction( 2992 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 2993 [=](CodeGenFunction &CGF, const Expr *, const Expr *, 2994 const Expr *) { CGF.EmitIgnoredExpr(E); }); 2995 } else 2996 // Emit reduction for array subscript or single variable. 2997 CGF.EmitIgnoredExpr(E); 2998 ++IPriv, ++ILHS, ++IRHS; 2999 } 3000 } 3001 3002 CGF.EmitBranch(DefaultBB); 3003 3004 // 7. Build case 2: 3005 // ... 3006 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 3007 // ... 3008 // break; 3009 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 3010 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 3011 CGF.EmitBlock(Case2BB); 3012 3013 { 3014 CodeGenFunction::RunCleanupsScope Scope(CGF); 3015 if (!WithNowait) { 3016 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 3017 llvm::Value *EndArgs[] = { 3018 IdentTLoc, // ident_t *<loc> 3019 ThreadId, // i32 <gtid> 3020 Lock // kmp_critical_name *&<lock> 3021 }; 3022 CGF.EHStack 3023 .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>( 3024 NormalAndEHCleanup, 3025 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 3026 llvm::makeArrayRef(EndArgs)); 3027 } 3028 auto ILHS = LHSExprs.begin(); 3029 auto IRHS = RHSExprs.begin(); 3030 auto IPriv = Privates.begin(); 3031 for (auto *E : ReductionOps) { 3032 const Expr *XExpr = nullptr; 3033 const Expr *EExpr = nullptr; 3034 const Expr *UpExpr = nullptr; 3035 BinaryOperatorKind BO = BO_Comma; 3036 if (auto *BO = dyn_cast<BinaryOperator>(E)) { 3037 if (BO->getOpcode() == BO_Assign) { 3038 XExpr = BO->getLHS(); 3039 UpExpr = BO->getRHS(); 3040 } 3041 } 3042 // Try to emit update expression as a simple atomic. 3043 auto *RHSExpr = UpExpr; 3044 if (RHSExpr) { 3045 // Analyze RHS part of the whole expression. 3046 if (auto *ACO = dyn_cast<AbstractConditionalOperator>( 3047 RHSExpr->IgnoreParenImpCasts())) { 3048 // If this is a conditional operator, analyze its condition for 3049 // min/max reduction operator. 3050 RHSExpr = ACO->getCond(); 3051 } 3052 if (auto *BORHS = 3053 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 3054 EExpr = BORHS->getRHS(); 3055 BO = BORHS->getOpcode(); 3056 } 3057 } 3058 if (XExpr) { 3059 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3060 auto &&AtomicRedGen = [this, BO, VD, IPriv, 3061 Loc](CodeGenFunction &CGF, const Expr *XExpr, 3062 const Expr *EExpr, const Expr *UpExpr) { 3063 LValue X = CGF.EmitLValue(XExpr); 3064 RValue E; 3065 if (EExpr) 3066 E = CGF.EmitAnyExpr(EExpr); 3067 CGF.EmitOMPAtomicSimpleUpdateExpr( 3068 X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc, 3069 [&CGF, UpExpr, VD, IPriv](RValue XRValue) { 3070 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 3071 PrivateScope.addPrivate(VD, [&CGF, VD, XRValue]() -> Address { 3072 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 3073 CGF.EmitStoreThroughLValue( 3074 XRValue, CGF.MakeAddrLValue(LHSTemp, VD->getType())); 3075 return LHSTemp; 3076 }); 3077 (void)PrivateScope.Privatize(); 3078 return CGF.EmitAnyExpr(UpExpr); 3079 }); 3080 }; 3081 if ((*IPriv)->getType()->isArrayType()) { 3082 // Emit atomic reduction for array section. 3083 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3084 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 3085 AtomicRedGen, XExpr, EExpr, UpExpr); 3086 } else 3087 // Emit atomic reduction for array subscript or single variable. 3088 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 3089 } else { 3090 // Emit as a critical region. 3091 auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *, 3092 const Expr *, const Expr *) { 3093 emitCriticalRegion( 3094 CGF, ".atomic_reduction", 3095 [E](CodeGenFunction &CGF) { CGF.EmitIgnoredExpr(E); }, Loc); 3096 }; 3097 if ((*IPriv)->getType()->isArrayType()) { 3098 auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3099 auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3100 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 3101 CritRedGen); 3102 } else 3103 CritRedGen(CGF, nullptr, nullptr, nullptr); 3104 } 3105 ++ILHS, ++IRHS, ++IPriv; 3106 } 3107 } 3108 3109 CGF.EmitBranch(DefaultBB); 3110 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 3111 } 3112 3113 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 3114 SourceLocation Loc) { 3115 if (!CGF.HaveInsertPoint()) 3116 return; 3117 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 3118 // global_tid); 3119 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3120 // Ignore return result until untied tasks are supported. 3121 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 3122 } 3123 3124 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 3125 OpenMPDirectiveKind InnerKind, 3126 const RegionCodeGenTy &CodeGen, 3127 bool HasCancel) { 3128 if (!CGF.HaveInsertPoint()) 3129 return; 3130 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 3131 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 3132 } 3133 3134 namespace { 3135 enum RTCancelKind { 3136 CancelNoreq = 0, 3137 CancelParallel = 1, 3138 CancelLoop = 2, 3139 CancelSections = 3, 3140 CancelTaskgroup = 4 3141 }; 3142 } 3143 3144 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 3145 RTCancelKind CancelKind = CancelNoreq; 3146 if (CancelRegion == OMPD_parallel) 3147 CancelKind = CancelParallel; 3148 else if (CancelRegion == OMPD_for) 3149 CancelKind = CancelLoop; 3150 else if (CancelRegion == OMPD_sections) 3151 CancelKind = CancelSections; 3152 else { 3153 assert(CancelRegion == OMPD_taskgroup); 3154 CancelKind = CancelTaskgroup; 3155 } 3156 return CancelKind; 3157 } 3158 3159 void CGOpenMPRuntime::emitCancellationPointCall( 3160 CodeGenFunction &CGF, SourceLocation Loc, 3161 OpenMPDirectiveKind CancelRegion) { 3162 if (!CGF.HaveInsertPoint()) 3163 return; 3164 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 3165 // global_tid, kmp_int32 cncl_kind); 3166 if (auto *OMPRegionInfo = 3167 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3168 if (OMPRegionInfo->getDirectiveKind() == OMPD_single) 3169 return; 3170 if (OMPRegionInfo->hasCancel()) { 3171 llvm::Value *Args[] = { 3172 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3173 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 3174 // Ignore return result until untied tasks are supported. 3175 auto *Result = CGF.EmitRuntimeCall( 3176 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 3177 // if (__kmpc_cancellationpoint()) { 3178 // __kmpc_cancel_barrier(); 3179 // exit from construct; 3180 // } 3181 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3182 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 3183 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 3184 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3185 CGF.EmitBlock(ExitBB); 3186 // __kmpc_cancel_barrier(); 3187 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 3188 // exit from construct; 3189 auto CancelDest = 3190 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3191 CGF.EmitBranchThroughCleanup(CancelDest); 3192 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3193 } 3194 } 3195 } 3196 3197 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 3198 const Expr *IfCond, 3199 OpenMPDirectiveKind CancelRegion) { 3200 if (!CGF.HaveInsertPoint()) 3201 return; 3202 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 3203 // kmp_int32 cncl_kind); 3204 if (auto *OMPRegionInfo = 3205 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3206 if (OMPRegionInfo->getDirectiveKind() == OMPD_single) 3207 return; 3208 auto &&ThenGen = [this, Loc, CancelRegion, 3209 OMPRegionInfo](CodeGenFunction &CGF) { 3210 llvm::Value *Args[] = { 3211 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3212 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 3213 // Ignore return result until untied tasks are supported. 3214 auto *Result = 3215 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 3216 // if (__kmpc_cancel()) { 3217 // __kmpc_cancel_barrier(); 3218 // exit from construct; 3219 // } 3220 auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3221 auto *ContBB = CGF.createBasicBlock(".cancel.continue"); 3222 auto *Cmp = CGF.Builder.CreateIsNotNull(Result); 3223 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3224 CGF.EmitBlock(ExitBB); 3225 // __kmpc_cancel_barrier(); 3226 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 3227 // exit from construct; 3228 auto CancelDest = 3229 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3230 CGF.EmitBranchThroughCleanup(CancelDest); 3231 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3232 }; 3233 if (IfCond) 3234 emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {}); 3235 else 3236 ThenGen(CGF); 3237 } 3238 } 3239 3240 llvm::Value * 3241 CGOpenMPRuntime::emitTargetOutlinedFunction(const OMPExecutableDirective &D, 3242 const RegionCodeGenTy &CodeGen) { 3243 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 3244 3245 CodeGenFunction CGF(CGM, true); 3246 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen); 3247 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 3248 return CGF.GenerateOpenMPCapturedStmtFunction(CS); 3249 } 3250 3251 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 3252 const OMPExecutableDirective &D, 3253 llvm::Value *OutlinedFn, 3254 const Expr *IfCond, const Expr *Device, 3255 ArrayRef<llvm::Value *> CapturedVars) { 3256 if (!CGF.HaveInsertPoint()) 3257 return; 3258 /// \brief Values for bit flags used to specify the mapping type for 3259 /// offloading. 3260 enum OpenMPOffloadMappingFlags { 3261 /// \brief Allocate memory on the device and move data from host to device. 3262 OMP_MAP_TO = 0x01, 3263 /// \brief Allocate memory on the device and move data from device to host. 3264 OMP_MAP_FROM = 0x02, 3265 /// \brief The element passed to the device is a pointer. 3266 OMP_MAP_PTR = 0x20, 3267 /// \brief Pass the element to the device by value. 3268 OMP_MAP_BYCOPY = 0x80, 3269 }; 3270 3271 enum OpenMPOffloadingReservedDeviceIDs { 3272 /// \brief Device ID if the device was not defined, runtime should get it 3273 /// from environment variables in the spec. 3274 OMP_DEVICEID_UNDEF = -1, 3275 }; 3276 3277 auto &Ctx = CGF.getContext(); 3278 3279 // Fill up the arrays with the all the captured variables. 3280 SmallVector<llvm::Value *, 16> BasePointers; 3281 SmallVector<llvm::Value *, 16> Pointers; 3282 SmallVector<llvm::Value *, 16> Sizes; 3283 SmallVector<unsigned, 16> MapTypes; 3284 3285 bool hasVLACaptures = false; 3286 3287 const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); 3288 auto RI = CS.getCapturedRecordDecl()->field_begin(); 3289 // auto II = CS.capture_init_begin(); 3290 auto CV = CapturedVars.begin(); 3291 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 3292 CE = CS.capture_end(); 3293 CI != CE; ++CI, ++RI, ++CV) { 3294 StringRef Name; 3295 QualType Ty; 3296 llvm::Value *BasePointer; 3297 llvm::Value *Pointer; 3298 llvm::Value *Size; 3299 unsigned MapType; 3300 3301 // VLA sizes are passed to the outlined region by copy. 3302 if (CI->capturesVariableArrayType()) { 3303 BasePointer = Pointer = *CV; 3304 Size = getTypeSize(CGF, RI->getType()); 3305 // Copy to the device as an argument. No need to retrieve it. 3306 MapType = OMP_MAP_BYCOPY; 3307 hasVLACaptures = true; 3308 } else if (CI->capturesThis()) { 3309 BasePointer = Pointer = *CV; 3310 const PointerType *PtrTy = cast<PointerType>(RI->getType().getTypePtr()); 3311 Size = getTypeSize(CGF, PtrTy->getPointeeType()); 3312 // Default map type. 3313 MapType = OMP_MAP_TO | OMP_MAP_FROM; 3314 } else if (CI->capturesVariableByCopy()) { 3315 MapType = OMP_MAP_BYCOPY; 3316 if (!RI->getType()->isAnyPointerType()) { 3317 // If the field is not a pointer, we need to save the actual value and 3318 // load it as a void pointer. 3319 auto DstAddr = CGF.CreateMemTemp( 3320 Ctx.getUIntPtrType(), 3321 Twine(CI->getCapturedVar()->getName()) + ".casted"); 3322 LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 3323 3324 auto *SrcAddrVal = CGF.EmitScalarConversion( 3325 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 3326 Ctx.getPointerType(RI->getType()), SourceLocation()); 3327 LValue SrcLV = 3328 CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType()); 3329 3330 // Store the value using the source type pointer. 3331 CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV); 3332 3333 // Load the value using the destination type pointer. 3334 BasePointer = Pointer = 3335 CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal(); 3336 } else { 3337 MapType |= OMP_MAP_PTR; 3338 BasePointer = Pointer = *CV; 3339 } 3340 Size = getTypeSize(CGF, RI->getType()); 3341 } else { 3342 assert(CI->capturesVariable() && "Expected captured reference."); 3343 BasePointer = Pointer = *CV; 3344 3345 const ReferenceType *PtrTy = 3346 cast<ReferenceType>(RI->getType().getTypePtr()); 3347 QualType ElementType = PtrTy->getPointeeType(); 3348 Size = getTypeSize(CGF, ElementType); 3349 // The default map type for a scalar/complex type is 'to' because by 3350 // default the value doesn't have to be retrieved. For an aggregate type, 3351 // the default is 'tofrom'. 3352 MapType = ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM) 3353 : OMP_MAP_TO; 3354 if (ElementType->isAnyPointerType()) 3355 MapType |= OMP_MAP_PTR; 3356 } 3357 3358 BasePointers.push_back(BasePointer); 3359 Pointers.push_back(Pointer); 3360 Sizes.push_back(Size); 3361 MapTypes.push_back(MapType); 3362 } 3363 3364 // Keep track on whether the host function has to be executed. 3365 auto OffloadErrorQType = 3366 Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); 3367 auto OffloadError = CGF.MakeAddrLValue( 3368 CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"), 3369 OffloadErrorQType); 3370 CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), 3371 OffloadError); 3372 3373 // Fill up the pointer arrays and transfer execution to the device. 3374 auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, 3375 hasVLACaptures, Device, OffloadError, 3376 OffloadErrorQType](CodeGenFunction &CGF) { 3377 unsigned PointerNumVal = BasePointers.size(); 3378 llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal); 3379 llvm::Value *BasePointersArray; 3380 llvm::Value *PointersArray; 3381 llvm::Value *SizesArray; 3382 llvm::Value *MapTypesArray; 3383 3384 if (PointerNumVal) { 3385 llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true); 3386 QualType PointerArrayType = Ctx.getConstantArrayType( 3387 Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 3388 /*IndexTypeQuals=*/0); 3389 3390 BasePointersArray = 3391 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 3392 PointersArray = 3393 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 3394 3395 // If we don't have any VLA types, we can use a constant array for the map 3396 // sizes, otherwise we need to fill up the arrays as we do for the 3397 // pointers. 3398 if (hasVLACaptures) { 3399 QualType SizeArrayType = Ctx.getConstantArrayType( 3400 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, 3401 /*IndexTypeQuals=*/0); 3402 SizesArray = 3403 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 3404 } else { 3405 // We expect all the sizes to be constant, so we collect them to create 3406 // a constant array. 3407 SmallVector<llvm::Constant *, 16> ConstSizes; 3408 for (auto S : Sizes) 3409 ConstSizes.push_back(cast<llvm::Constant>(S)); 3410 3411 auto *SizesArrayInit = llvm::ConstantArray::get( 3412 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); 3413 auto *SizesArrayGbl = new llvm::GlobalVariable( 3414 CGM.getModule(), SizesArrayInit->getType(), 3415 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 3416 SizesArrayInit, ".offload_sizes"); 3417 SizesArrayGbl->setUnnamedAddr(true); 3418 SizesArray = SizesArrayGbl; 3419 } 3420 3421 // The map types are always constant so we don't need to generate code to 3422 // fill arrays. Instead, we create an array constant. 3423 llvm::Constant *MapTypesArrayInit = 3424 llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); 3425 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 3426 CGM.getModule(), MapTypesArrayInit->getType(), 3427 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 3428 MapTypesArrayInit, ".offload_maptypes"); 3429 MapTypesArrayGbl->setUnnamedAddr(true); 3430 MapTypesArray = MapTypesArrayGbl; 3431 3432 for (unsigned i = 0; i < PointerNumVal; ++i) { 3433 3434 llvm::Value *BPVal = BasePointers[i]; 3435 if (BPVal->getType()->isPointerTy()) 3436 BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy); 3437 else { 3438 assert(BPVal->getType()->isIntegerTy() && 3439 "If not a pointer, the value type must be an integer."); 3440 BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy); 3441 } 3442 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 3443 llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), 3444 BasePointersArray, 0, i); 3445 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 3446 CGF.Builder.CreateStore(BPVal, BPAddr); 3447 3448 llvm::Value *PVal = Pointers[i]; 3449 if (PVal->getType()->isPointerTy()) 3450 PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy); 3451 else { 3452 assert(PVal->getType()->isIntegerTy() && 3453 "If not a pointer, the value type must be an integer."); 3454 PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy); 3455 } 3456 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 3457 llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 3458 0, i); 3459 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 3460 CGF.Builder.CreateStore(PVal, PAddr); 3461 3462 if (hasVLACaptures) { 3463 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 3464 llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, 3465 /*Idx0=*/0, 3466 /*Idx1=*/i); 3467 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); 3468 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast( 3469 Sizes[i], CGM.SizeTy, /*isSigned=*/true), 3470 SAddr); 3471 } 3472 } 3473 3474 BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32( 3475 llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray, 3476 /*Idx0=*/0, /*Idx1=*/0); 3477 PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32( 3478 llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 3479 /*Idx0=*/0, 3480 /*Idx1=*/0); 3481 SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32( 3482 llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, 3483 /*Idx0=*/0, /*Idx1=*/0); 3484 MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32( 3485 llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray, 3486 /*Idx0=*/0, 3487 /*Idx1=*/0); 3488 3489 } else { 3490 BasePointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 3491 PointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 3492 SizesArray = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); 3493 MapTypesArray = 3494 llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()); 3495 } 3496 3497 // On top of the arrays that were filled up, the target offloading call 3498 // takes as arguments the device id as well as the host pointer. The host 3499 // pointer is used by the runtime library to identify the current target 3500 // region, so it only has to be unique and not necessarily point to 3501 // anything. It could be the pointer to the outlined function that 3502 // implements the target region, but we aren't using that so that the 3503 // compiler doesn't need to keep that, and could therefore inline the host 3504 // function if proven worthwhile during optimization. 3505 3506 llvm::Value *HostPtr = new llvm::GlobalVariable( 3507 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 3508 llvm::GlobalValue::PrivateLinkage, 3509 llvm::Constant::getNullValue(CGM.Int8Ty), ".offload_hstptr"); 3510 3511 // Emit device ID if any. 3512 llvm::Value *DeviceID; 3513 if (Device) 3514 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 3515 CGM.Int32Ty, /*isSigned=*/true); 3516 else 3517 DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); 3518 3519 llvm::Value *OffloadingArgs[] = { 3520 DeviceID, HostPtr, PointerNum, BasePointersArray, 3521 PointersArray, SizesArray, MapTypesArray}; 3522 auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target), 3523 OffloadingArgs); 3524 3525 CGF.EmitStoreOfScalar(Return, OffloadError); 3526 }; 3527 3528 if (IfCond) { 3529 // Notify that the host version must be executed. 3530 auto &&ElseGen = [this, OffloadError, 3531 OffloadErrorQType](CodeGenFunction &CGF) { 3532 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u), 3533 OffloadError); 3534 }; 3535 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 3536 } else { 3537 CodeGenFunction::RunCleanupsScope Scope(CGF); 3538 ThenGen(CGF); 3539 } 3540 3541 // Check the error code and execute the host version if required. 3542 auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); 3543 auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont"); 3544 auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation()); 3545 auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal); 3546 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 3547 3548 CGF.EmitBlock(OffloadFailedBlock); 3549 CGF.Builder.CreateCall(OutlinedFn, BasePointers); 3550 CGF.EmitBranch(OffloadContBlock); 3551 3552 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 3553 return; 3554 } 3555