1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CGCleanup.h" 17 #include "clang/AST/Decl.h" 18 #include "clang/AST/StmtOpenMP.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/IR/CallSite.h" 21 #include "llvm/IR/DerivedTypes.h" 22 #include "llvm/IR/GlobalValue.h" 23 #include "llvm/IR/Value.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include <cassert> 26 27 using namespace clang; 28 using namespace CodeGen; 29 30 namespace { 31 /// \brief Base class for handling code generation inside OpenMP regions. 32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 33 public: 34 /// \brief Kinds of OpenMP regions used in codegen. 35 enum CGOpenMPRegionKind { 36 /// \brief Region with outlined function for standalone 'parallel' 37 /// directive. 38 ParallelOutlinedRegion, 39 /// \brief Region with outlined function for standalone 'task' directive. 40 TaskOutlinedRegion, 41 /// \brief Region for constructs that do not require function outlining, 42 /// like 'for', 'sections', 'atomic' etc. directives. 43 InlinedRegion, 44 }; 45 46 CGOpenMPRegionInfo(const CapturedStmt &CS, 47 const CGOpenMPRegionKind RegionKind, 48 const RegionCodeGenTy &CodeGen) 49 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 50 CodeGen(CodeGen) {} 51 52 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 53 const RegionCodeGenTy &CodeGen) 54 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), 55 CodeGen(CodeGen) {} 56 57 /// \brief Get a variable or parameter for storing global thread id 58 /// inside OpenMP construct. 59 virtual const VarDecl *getThreadIDVariable() const = 0; 60 61 /// \brief Emit the captured statement body. 62 virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 63 64 /// \brief Get an LValue for the current ThreadID variable. 65 /// \return LValue for thread id variable. This LValue always has type int32*. 66 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 67 68 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 69 70 static bool classof(const CGCapturedStmtInfo *Info) { 71 return Info->getKind() == CR_OpenMP; 72 } 73 74 protected: 75 CGOpenMPRegionKind RegionKind; 76 const RegionCodeGenTy &CodeGen; 77 }; 78 79 /// \brief API for captured statement code generation in OpenMP constructs. 80 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo { 81 public: 82 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 83 const RegionCodeGenTy &CodeGen) 84 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen), 85 ThreadIDVar(ThreadIDVar) { 86 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 87 } 88 /// \brief Get a variable or parameter for storing global thread id 89 /// inside OpenMP construct. 90 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 91 92 /// \brief Get the name of the capture helper. 93 StringRef getHelperName() const override { return ".omp_outlined."; } 94 95 static bool classof(const CGCapturedStmtInfo *Info) { 96 return CGOpenMPRegionInfo::classof(Info) && 97 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 98 ParallelOutlinedRegion; 99 } 100 101 private: 102 /// \brief A variable or parameter storing global thread id for OpenMP 103 /// constructs. 104 const VarDecl *ThreadIDVar; 105 }; 106 107 /// \brief API for captured statement code generation in OpenMP constructs. 108 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo { 109 public: 110 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 111 const VarDecl *ThreadIDVar, 112 const RegionCodeGenTy &CodeGen) 113 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen), 114 ThreadIDVar(ThreadIDVar) { 115 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 116 } 117 /// \brief Get a variable or parameter for storing global thread id 118 /// inside OpenMP construct. 119 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 120 121 /// \brief Get an LValue for the current ThreadID variable. 122 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 123 124 /// \brief Get the name of the capture helper. 125 StringRef getHelperName() const override { return ".omp_outlined."; } 126 127 static bool classof(const CGCapturedStmtInfo *Info) { 128 return CGOpenMPRegionInfo::classof(Info) && 129 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 130 TaskOutlinedRegion; 131 } 132 133 private: 134 /// \brief A variable or parameter storing global thread id for OpenMP 135 /// constructs. 136 const VarDecl *ThreadIDVar; 137 }; 138 139 /// \brief API for inlined captured statement code generation in OpenMP 140 /// constructs. 141 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 142 public: 143 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 144 const RegionCodeGenTy &CodeGen) 145 : CGOpenMPRegionInfo(InlinedRegion, CodeGen), OldCSI(OldCSI), 146 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 147 // \brief Retrieve the value of the context parameter. 148 llvm::Value *getContextValue() const override { 149 if (OuterRegionInfo) 150 return OuterRegionInfo->getContextValue(); 151 llvm_unreachable("No context value for inlined OpenMP region"); 152 } 153 virtual void setContextValue(llvm::Value *V) override { 154 if (OuterRegionInfo) { 155 OuterRegionInfo->setContextValue(V); 156 return; 157 } 158 llvm_unreachable("No context value for inlined OpenMP region"); 159 } 160 /// \brief Lookup the captured field decl for a variable. 161 const FieldDecl *lookup(const VarDecl *VD) const override { 162 if (OuterRegionInfo) 163 return OuterRegionInfo->lookup(VD); 164 // If there is no outer outlined region,no need to lookup in a list of 165 // captured variables, we can use the original one. 166 return nullptr; 167 } 168 FieldDecl *getThisFieldDecl() const override { 169 if (OuterRegionInfo) 170 return OuterRegionInfo->getThisFieldDecl(); 171 return nullptr; 172 } 173 /// \brief Get a variable or parameter for storing global thread id 174 /// inside OpenMP construct. 175 const VarDecl *getThreadIDVariable() const override { 176 if (OuterRegionInfo) 177 return OuterRegionInfo->getThreadIDVariable(); 178 return nullptr; 179 } 180 181 /// \brief Get the name of the capture helper. 182 StringRef getHelperName() const override { 183 if (auto *OuterRegionInfo = getOldCSI()) 184 return OuterRegionInfo->getHelperName(); 185 llvm_unreachable("No helper name for inlined OpenMP construct"); 186 } 187 188 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 189 190 static bool classof(const CGCapturedStmtInfo *Info) { 191 return CGOpenMPRegionInfo::classof(Info) && 192 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 193 } 194 195 private: 196 /// \brief CodeGen info about outer OpenMP region. 197 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 198 CGOpenMPRegionInfo *OuterRegionInfo; 199 }; 200 201 /// \brief RAII for emitting code of OpenMP constructs. 202 class InlinedOpenMPRegionRAII { 203 CodeGenFunction &CGF; 204 205 public: 206 /// \brief Constructs region for combined constructs. 207 /// \param CodeGen Code generation sequence for combined directives. Includes 208 /// a list of functions used for code generation of implicitly inlined 209 /// regions. 210 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen) 211 : CGF(CGF) { 212 // Start emission for the construct. 213 CGF.CapturedStmtInfo = 214 new CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, CodeGen); 215 } 216 ~InlinedOpenMPRegionRAII() { 217 // Restore original CapturedStmtInfo only if we're done with code emission. 218 auto *OldCSI = 219 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 220 delete CGF.CapturedStmtInfo; 221 CGF.CapturedStmtInfo = OldCSI; 222 } 223 }; 224 225 } // namespace 226 227 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 228 return CGF.MakeNaturalAlignAddrLValue( 229 CGF.Builder.CreateAlignedLoad( 230 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 231 CGF.PointerAlignInBytes), 232 getThreadIDVariable() 233 ->getType() 234 ->castAs<PointerType>() 235 ->getPointeeType()); 236 } 237 238 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 239 // 1.2.2 OpenMP Language Terminology 240 // Structured block - An executable statement with a single entry at the 241 // top and a single exit at the bottom. 242 // The point of exit cannot be a branch out of the structured block. 243 // longjmp() and throw() must not violate the entry/exit criteria. 244 CGF.EHStack.pushTerminate(); 245 { 246 CodeGenFunction::RunCleanupsScope Scope(CGF); 247 CodeGen(CGF); 248 } 249 CGF.EHStack.popTerminate(); 250 } 251 252 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 253 CodeGenFunction &CGF) { 254 return CGF.MakeNaturalAlignAddrLValue( 255 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 256 getThreadIDVariable()->getType()); 257 } 258 259 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 260 : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) { 261 IdentTy = llvm::StructType::create( 262 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 263 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 264 CGM.Int8PtrTy /* psource */, nullptr); 265 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 266 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 267 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 268 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 269 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 270 } 271 272 void CGOpenMPRuntime::clear() { 273 InternalVars.clear(); 274 } 275 276 llvm::Value * 277 CGOpenMPRuntime::emitParallelOutlinedFunction(const OMPExecutableDirective &D, 278 const VarDecl *ThreadIDVar, 279 const RegionCodeGenTy &CodeGen) { 280 assert(ThreadIDVar->getType()->isPointerType() && 281 "thread id variable must be of type kmp_int32 *"); 282 const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 283 CodeGenFunction CGF(CGM, true); 284 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen); 285 CGF.CapturedStmtInfo = &CGInfo; 286 return CGF.GenerateCapturedStmtFunction(*CS); 287 } 288 289 llvm::Value * 290 CGOpenMPRuntime::emitTaskOutlinedFunction(const OMPExecutableDirective &D, 291 const VarDecl *ThreadIDVar, 292 const RegionCodeGenTy &CodeGen) { 293 assert(!ThreadIDVar->getType()->isPointerType() && 294 "thread id variable must be of type kmp_int32 for tasks"); 295 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 296 CodeGenFunction CGF(CGM, true); 297 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen); 298 CGF.CapturedStmtInfo = &CGInfo; 299 return CGF.GenerateCapturedStmtFunction(*CS); 300 } 301 302 llvm::Value * 303 CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { 304 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 305 if (!Entry) { 306 if (!DefaultOpenMPPSource) { 307 // Initialize default location for psource field of ident_t structure of 308 // all ident_t objects. Format is ";file;function;line;column;;". 309 // Taken from 310 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 311 DefaultOpenMPPSource = 312 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;"); 313 DefaultOpenMPPSource = 314 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 315 } 316 auto DefaultOpenMPLocation = new llvm::GlobalVariable( 317 CGM.getModule(), IdentTy, /*isConstant*/ true, 318 llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr); 319 DefaultOpenMPLocation->setUnnamedAddr(true); 320 321 llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true); 322 llvm::Constant *Values[] = {Zero, 323 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 324 Zero, Zero, DefaultOpenMPPSource}; 325 llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values); 326 DefaultOpenMPLocation->setInitializer(Init); 327 OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation; 328 return DefaultOpenMPLocation; 329 } 330 return Entry; 331 } 332 333 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 334 SourceLocation Loc, 335 OpenMPLocationFlags Flags) { 336 // If no debug info is generated - return global default location. 337 if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo || 338 Loc.isInvalid()) 339 return getOrCreateDefaultLocation(Flags); 340 341 assert(CGF.CurFn && "No function in current CodeGenFunction."); 342 343 llvm::Value *LocValue = nullptr; 344 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 345 if (I != OpenMPLocThreadIDMap.end()) 346 LocValue = I->second.DebugLoc; 347 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 348 // GetOpenMPThreadID was called before this routine. 349 if (LocValue == nullptr) { 350 // Generate "ident_t .kmpc_loc.addr;" 351 llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr"); 352 AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy)); 353 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 354 Elem.second.DebugLoc = AI; 355 LocValue = AI; 356 357 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 358 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 359 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 360 llvm::ConstantExpr::getSizeOf(IdentTy), 361 CGM.PointerAlignInBytes); 362 } 363 364 // char **psource = &.kmpc_loc_<flags>.addr.psource; 365 auto *PSource = CGF.Builder.CreateConstInBoundsGEP2_32(IdentTy, LocValue, 0, 366 IdentField_PSource); 367 368 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 369 if (OMPDebugLoc == nullptr) { 370 SmallString<128> Buffer2; 371 llvm::raw_svector_ostream OS2(Buffer2); 372 // Build debug location 373 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 374 OS2 << ";" << PLoc.getFilename() << ";"; 375 if (const FunctionDecl *FD = 376 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 377 OS2 << FD->getQualifiedNameAsString(); 378 } 379 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 380 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 381 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 382 } 383 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 384 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 385 386 return LocValue; 387 } 388 389 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 390 SourceLocation Loc) { 391 assert(CGF.CurFn && "No function in current CodeGenFunction."); 392 393 llvm::Value *ThreadID = nullptr; 394 // Check whether we've already cached a load of the thread id in this 395 // function. 396 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 397 if (I != OpenMPLocThreadIDMap.end()) { 398 ThreadID = I->second.ThreadID; 399 if (ThreadID != nullptr) 400 return ThreadID; 401 } 402 if (auto OMPRegionInfo = 403 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 404 if (OMPRegionInfo->getThreadIDVariable()) { 405 // Check if this an outlined function with thread id passed as argument. 406 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 407 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 408 // If value loaded in entry block, cache it and use it everywhere in 409 // function. 410 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 411 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 412 Elem.second.ThreadID = ThreadID; 413 } 414 return ThreadID; 415 } 416 } 417 418 // This is not an outlined function region - need to call __kmpc_int32 419 // kmpc_global_thread_num(ident_t *loc). 420 // Generate thread id value and cache this value for use across the 421 // function. 422 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 423 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 424 ThreadID = 425 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 426 emitUpdateLocation(CGF, Loc)); 427 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 428 Elem.second.ThreadID = ThreadID; 429 return ThreadID; 430 } 431 432 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 433 assert(CGF.CurFn && "No function in current CodeGenFunction."); 434 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 435 OpenMPLocThreadIDMap.erase(CGF.CurFn); 436 } 437 438 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 439 return llvm::PointerType::getUnqual(IdentTy); 440 } 441 442 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 443 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 444 } 445 446 llvm::Constant * 447 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { 448 llvm::Constant *RTLFn = nullptr; 449 switch (Function) { 450 case OMPRTL__kmpc_fork_call: { 451 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 452 // microtask, ...); 453 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 454 getKmpc_MicroPointerTy()}; 455 llvm::FunctionType *FnTy = 456 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 457 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 458 break; 459 } 460 case OMPRTL__kmpc_global_thread_num: { 461 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 462 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 463 llvm::FunctionType *FnTy = 464 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 465 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 466 break; 467 } 468 case OMPRTL__kmpc_threadprivate_cached: { 469 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 470 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 471 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 472 CGM.VoidPtrTy, CGM.SizeTy, 473 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 474 llvm::FunctionType *FnTy = 475 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 476 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 477 break; 478 } 479 case OMPRTL__kmpc_critical: { 480 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 481 // kmp_critical_name *crit); 482 llvm::Type *TypeParams[] = { 483 getIdentTyPointerTy(), CGM.Int32Ty, 484 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 485 llvm::FunctionType *FnTy = 486 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 487 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 488 break; 489 } 490 case OMPRTL__kmpc_threadprivate_register: { 491 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 492 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 493 // typedef void *(*kmpc_ctor)(void *); 494 auto KmpcCtorTy = 495 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 496 /*isVarArg*/ false)->getPointerTo(); 497 // typedef void *(*kmpc_cctor)(void *, void *); 498 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 499 auto KmpcCopyCtorTy = 500 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 501 /*isVarArg*/ false)->getPointerTo(); 502 // typedef void (*kmpc_dtor)(void *); 503 auto KmpcDtorTy = 504 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 505 ->getPointerTo(); 506 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 507 KmpcCopyCtorTy, KmpcDtorTy}; 508 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 509 /*isVarArg*/ false); 510 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 511 break; 512 } 513 case OMPRTL__kmpc_end_critical: { 514 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 515 // kmp_critical_name *crit); 516 llvm::Type *TypeParams[] = { 517 getIdentTyPointerTy(), CGM.Int32Ty, 518 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 519 llvm::FunctionType *FnTy = 520 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 521 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 522 break; 523 } 524 case OMPRTL__kmpc_cancel_barrier: { 525 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 526 // global_tid); 527 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 528 llvm::FunctionType *FnTy = 529 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 530 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 531 break; 532 } 533 case OMPRTL__kmpc_for_static_fini: { 534 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 535 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 536 llvm::FunctionType *FnTy = 537 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 538 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 539 break; 540 } 541 case OMPRTL__kmpc_push_num_threads: { 542 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 543 // kmp_int32 num_threads) 544 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 545 CGM.Int32Ty}; 546 llvm::FunctionType *FnTy = 547 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 548 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 549 break; 550 } 551 case OMPRTL__kmpc_serialized_parallel: { 552 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 553 // global_tid); 554 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 555 llvm::FunctionType *FnTy = 556 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 557 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 558 break; 559 } 560 case OMPRTL__kmpc_end_serialized_parallel: { 561 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 562 // global_tid); 563 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 564 llvm::FunctionType *FnTy = 565 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 566 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 567 break; 568 } 569 case OMPRTL__kmpc_flush: { 570 // Build void __kmpc_flush(ident_t *loc); 571 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 572 llvm::FunctionType *FnTy = 573 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 574 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 575 break; 576 } 577 case OMPRTL__kmpc_master: { 578 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 579 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 580 llvm::FunctionType *FnTy = 581 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 582 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 583 break; 584 } 585 case OMPRTL__kmpc_end_master: { 586 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 587 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 588 llvm::FunctionType *FnTy = 589 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 590 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 591 break; 592 } 593 case OMPRTL__kmpc_omp_taskyield: { 594 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 595 // int end_part); 596 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 597 llvm::FunctionType *FnTy = 598 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 599 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 600 break; 601 } 602 case OMPRTL__kmpc_single: { 603 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 604 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 605 llvm::FunctionType *FnTy = 606 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 607 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 608 break; 609 } 610 case OMPRTL__kmpc_end_single: { 611 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 612 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 613 llvm::FunctionType *FnTy = 614 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 615 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 616 break; 617 } 618 case OMPRTL__kmpc_omp_task_alloc: { 619 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 620 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 621 // kmp_routine_entry_t *task_entry); 622 assert(KmpRoutineEntryPtrTy != nullptr && 623 "Type kmp_routine_entry_t must be created."); 624 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 625 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 626 // Return void * and then cast to particular kmp_task_t type. 627 llvm::FunctionType *FnTy = 628 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 629 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 630 break; 631 } 632 case OMPRTL__kmpc_omp_task: { 633 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 634 // *new_task); 635 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 636 CGM.VoidPtrTy}; 637 llvm::FunctionType *FnTy = 638 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 639 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 640 break; 641 } 642 case OMPRTL__kmpc_copyprivate: { 643 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 644 // kmp_int32 cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 645 // kmp_int32 didit); 646 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 647 auto *CpyFnTy = 648 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 649 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 650 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 651 CGM.Int32Ty}; 652 llvm::FunctionType *FnTy = 653 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 654 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 655 break; 656 } 657 case OMPRTL__kmpc_reduce: { 658 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 659 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 660 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 661 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 662 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 663 /*isVarArg=*/false); 664 llvm::Type *TypeParams[] = { 665 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 666 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 667 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 668 llvm::FunctionType *FnTy = 669 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 670 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 671 break; 672 } 673 case OMPRTL__kmpc_reduce_nowait: { 674 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 675 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 676 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 677 // *lck); 678 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 679 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 680 /*isVarArg=*/false); 681 llvm::Type *TypeParams[] = { 682 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 683 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 684 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 685 llvm::FunctionType *FnTy = 686 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 687 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 688 break; 689 } 690 case OMPRTL__kmpc_end_reduce: { 691 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 692 // kmp_critical_name *lck); 693 llvm::Type *TypeParams[] = { 694 getIdentTyPointerTy(), CGM.Int32Ty, 695 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 696 llvm::FunctionType *FnTy = 697 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 698 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 699 break; 700 } 701 case OMPRTL__kmpc_end_reduce_nowait: { 702 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 703 // kmp_critical_name *lck); 704 llvm::Type *TypeParams[] = { 705 getIdentTyPointerTy(), CGM.Int32Ty, 706 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 707 llvm::FunctionType *FnTy = 708 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 709 RTLFn = 710 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 711 break; 712 } 713 } 714 return RTLFn; 715 } 716 717 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 718 bool IVSigned) { 719 assert((IVSize == 32 || IVSize == 64) && 720 "IV size is not compatible with the omp runtime"); 721 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 722 : "__kmpc_for_static_init_4u") 723 : (IVSigned ? "__kmpc_for_static_init_8" 724 : "__kmpc_for_static_init_8u"); 725 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 726 auto PtrTy = llvm::PointerType::getUnqual(ITy); 727 llvm::Type *TypeParams[] = { 728 getIdentTyPointerTy(), // loc 729 CGM.Int32Ty, // tid 730 CGM.Int32Ty, // schedtype 731 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 732 PtrTy, // p_lower 733 PtrTy, // p_upper 734 PtrTy, // p_stride 735 ITy, // incr 736 ITy // chunk 737 }; 738 llvm::FunctionType *FnTy = 739 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 740 return CGM.CreateRuntimeFunction(FnTy, Name); 741 } 742 743 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 744 bool IVSigned) { 745 assert((IVSize == 32 || IVSize == 64) && 746 "IV size is not compatible with the omp runtime"); 747 auto Name = 748 IVSize == 32 749 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 750 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 751 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 752 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 753 CGM.Int32Ty, // tid 754 CGM.Int32Ty, // schedtype 755 ITy, // lower 756 ITy, // upper 757 ITy, // stride 758 ITy // chunk 759 }; 760 llvm::FunctionType *FnTy = 761 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 762 return CGM.CreateRuntimeFunction(FnTy, Name); 763 } 764 765 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 766 bool IVSigned) { 767 assert((IVSize == 32 || IVSize == 64) && 768 "IV size is not compatible with the omp runtime"); 769 auto Name = 770 IVSize == 32 771 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 772 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 773 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 774 auto PtrTy = llvm::PointerType::getUnqual(ITy); 775 llvm::Type *TypeParams[] = { 776 getIdentTyPointerTy(), // loc 777 CGM.Int32Ty, // tid 778 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 779 PtrTy, // p_lower 780 PtrTy, // p_upper 781 PtrTy // p_stride 782 }; 783 llvm::FunctionType *FnTy = 784 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 785 return CGM.CreateRuntimeFunction(FnTy, Name); 786 } 787 788 llvm::Constant * 789 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 790 // Lookup the entry, lazily creating it if necessary. 791 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 792 Twine(CGM.getMangledName(VD)) + ".cache."); 793 } 794 795 llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 796 const VarDecl *VD, 797 llvm::Value *VDAddr, 798 SourceLocation Loc) { 799 auto VarTy = VDAddr->getType()->getPointerElementType(); 800 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 801 CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy), 802 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 803 getOrCreateThreadPrivateCache(VD)}; 804 return CGF.EmitRuntimeCall( 805 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args); 806 } 807 808 void CGOpenMPRuntime::emitThreadPrivateVarInit( 809 CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor, 810 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 811 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 812 // library. 813 auto OMPLoc = emitUpdateLocation(CGF, Loc); 814 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 815 OMPLoc); 816 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 817 // to register constructor/destructor for variable. 818 llvm::Value *Args[] = {OMPLoc, 819 CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy), 820 Ctor, CopyCtor, Dtor}; 821 CGF.EmitRuntimeCall( 822 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 823 } 824 825 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 826 const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc, 827 bool PerformInit, CodeGenFunction *CGF) { 828 VD = VD->getDefinition(CGM.getContext()); 829 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 830 ThreadPrivateWithDefinition.insert(VD); 831 QualType ASTTy = VD->getType(); 832 833 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 834 auto Init = VD->getAnyInitializer(); 835 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 836 // Generate function that re-emits the declaration's initializer into the 837 // threadprivate copy of the variable VD 838 CodeGenFunction CtorCGF(CGM); 839 FunctionArgList Args; 840 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 841 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 842 Args.push_back(&Dst); 843 844 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 845 CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(), 846 /*isVariadic=*/false); 847 auto FTy = CGM.getTypes().GetFunctionType(FI); 848 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 849 FTy, ".__kmpc_global_ctor_.", Loc); 850 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 851 Args, SourceLocation()); 852 auto ArgVal = CtorCGF.EmitLoadOfScalar( 853 CtorCGF.GetAddrOfLocalVar(&Dst), 854 /*Volatile=*/false, CGM.PointerAlignInBytes, 855 CGM.getContext().VoidPtrTy, Dst.getLocation()); 856 auto Arg = CtorCGF.Builder.CreatePointerCast( 857 ArgVal, 858 CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy))); 859 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 860 /*IsInitializer=*/true); 861 ArgVal = CtorCGF.EmitLoadOfScalar( 862 CtorCGF.GetAddrOfLocalVar(&Dst), 863 /*Volatile=*/false, CGM.PointerAlignInBytes, 864 CGM.getContext().VoidPtrTy, Dst.getLocation()); 865 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 866 CtorCGF.FinishFunction(); 867 Ctor = Fn; 868 } 869 if (VD->getType().isDestructedType() != QualType::DK_none) { 870 // Generate function that emits destructor call for the threadprivate copy 871 // of the variable VD 872 CodeGenFunction DtorCGF(CGM); 873 FunctionArgList Args; 874 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 875 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 876 Args.push_back(&Dst); 877 878 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 879 CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(), 880 /*isVariadic=*/false); 881 auto FTy = CGM.getTypes().GetFunctionType(FI); 882 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 883 FTy, ".__kmpc_global_dtor_.", Loc); 884 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 885 SourceLocation()); 886 auto ArgVal = DtorCGF.EmitLoadOfScalar( 887 DtorCGF.GetAddrOfLocalVar(&Dst), 888 /*Volatile=*/false, CGM.PointerAlignInBytes, 889 CGM.getContext().VoidPtrTy, Dst.getLocation()); 890 DtorCGF.emitDestroy(ArgVal, ASTTy, 891 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 892 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 893 DtorCGF.FinishFunction(); 894 Dtor = Fn; 895 } 896 // Do not emit init function if it is not required. 897 if (!Ctor && !Dtor) 898 return nullptr; 899 900 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 901 auto CopyCtorTy = 902 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 903 /*isVarArg=*/false)->getPointerTo(); 904 // Copying constructor for the threadprivate variable. 905 // Must be NULL - reserved by runtime, but currently it requires that this 906 // parameter is always NULL. Otherwise it fires assertion. 907 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 908 if (Ctor == nullptr) { 909 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 910 /*isVarArg=*/false)->getPointerTo(); 911 Ctor = llvm::Constant::getNullValue(CtorTy); 912 } 913 if (Dtor == nullptr) { 914 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 915 /*isVarArg=*/false)->getPointerTo(); 916 Dtor = llvm::Constant::getNullValue(DtorTy); 917 } 918 if (!CGF) { 919 auto InitFunctionTy = 920 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 921 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 922 InitFunctionTy, ".__omp_threadprivate_init_."); 923 CodeGenFunction InitCGF(CGM); 924 FunctionArgList ArgList; 925 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 926 CGM.getTypes().arrangeNullaryFunction(), ArgList, 927 Loc); 928 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 929 InitCGF.FinishFunction(); 930 return InitFunction; 931 } 932 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 933 } 934 return nullptr; 935 } 936 937 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 938 llvm::Value *OutlinedFn, 939 llvm::Value *CapturedStruct) { 940 // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/) 941 llvm::Value *Args[] = { 942 emitUpdateLocation(CGF, Loc), 943 CGF.Builder.getInt32(1), // Number of arguments after 'microtask' argument 944 // (there is only one additional argument - 'context') 945 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()), 946 CGF.EmitCastToVoidPtr(CapturedStruct)}; 947 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call); 948 CGF.EmitRuntimeCall(RTLFn, Args); 949 } 950 951 void CGOpenMPRuntime::emitSerialCall(CodeGenFunction &CGF, SourceLocation Loc, 952 llvm::Value *OutlinedFn, 953 llvm::Value *CapturedStruct) { 954 auto ThreadID = getThreadID(CGF, Loc); 955 // Build calls: 956 // __kmpc_serialized_parallel(&Loc, GTid); 957 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), ThreadID}; 958 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), 959 Args); 960 961 // OutlinedFn(>id, &zero, CapturedStruct); 962 auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc); 963 auto Int32Ty = 964 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 965 auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr"); 966 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 967 llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct}; 968 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 969 970 // __kmpc_end_serialized_parallel(&Loc, GTid); 971 llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID}; 972 CGF.EmitRuntimeCall( 973 createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs); 974 } 975 976 // If we're inside an (outlined) parallel region, use the region info's 977 // thread-ID variable (it is passed in a first argument of the outlined function 978 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 979 // regular serial code region, get thread ID by calling kmp_int32 980 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 981 // return the address of that temp. 982 llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 983 SourceLocation Loc) { 984 if (auto OMPRegionInfo = 985 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 986 if (OMPRegionInfo->getThreadIDVariable()) 987 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 988 989 auto ThreadID = getThreadID(CGF, Loc); 990 auto Int32Ty = 991 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 992 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 993 CGF.EmitStoreOfScalar(ThreadID, 994 CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty)); 995 996 return ThreadIDTemp; 997 } 998 999 llvm::Constant * 1000 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 1001 const llvm::Twine &Name) { 1002 SmallString<256> Buffer; 1003 llvm::raw_svector_ostream Out(Buffer); 1004 Out << Name; 1005 auto RuntimeName = Out.str(); 1006 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 1007 if (Elem.second) { 1008 assert(Elem.second->getType()->getPointerElementType() == Ty && 1009 "OMP internal variable has different type than requested"); 1010 return &*Elem.second; 1011 } 1012 1013 return Elem.second = new llvm::GlobalVariable( 1014 CGM.getModule(), Ty, /*IsConstant*/ false, 1015 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 1016 Elem.first()); 1017 } 1018 1019 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 1020 llvm::Twine Name(".gomp_critical_user_", CriticalName); 1021 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 1022 } 1023 1024 namespace { 1025 class CallEndCleanup : public EHScopeStack::Cleanup { 1026 public: 1027 typedef ArrayRef<llvm::Value *> CleanupValuesTy; 1028 private: 1029 llvm::Value *Callee; 1030 llvm::SmallVector<llvm::Value *, 8> Args; 1031 1032 public: 1033 CallEndCleanup(llvm::Value *Callee, CleanupValuesTy Args) 1034 : Callee(Callee), Args(Args.begin(), Args.end()) {} 1035 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 1036 CGF.EmitRuntimeCall(Callee, Args); 1037 } 1038 }; 1039 } // namespace 1040 1041 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 1042 StringRef CriticalName, 1043 const RegionCodeGenTy &CriticalOpGen, 1044 SourceLocation Loc) { 1045 // __kmpc_critical(ident_t *, gtid, Lock); 1046 // CriticalOpGen(); 1047 // __kmpc_end_critical(ident_t *, gtid, Lock); 1048 // Prepare arguments and build a call to __kmpc_critical 1049 { 1050 CodeGenFunction::RunCleanupsScope Scope(CGF); 1051 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1052 getCriticalRegionLock(CriticalName)}; 1053 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args); 1054 // Build a call to __kmpc_end_critical 1055 CGF.EHStack.pushCleanup<CallEndCleanup>( 1056 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical), 1057 llvm::makeArrayRef(Args)); 1058 emitInlinedDirective(CGF, CriticalOpGen); 1059 } 1060 } 1061 1062 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond, 1063 const RegionCodeGenTy &BodyOpGen) { 1064 llvm::Value *CallBool = CGF.EmitScalarConversion( 1065 IfCond, 1066 CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true), 1067 CGF.getContext().BoolTy); 1068 1069 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1070 auto *ContBlock = CGF.createBasicBlock("omp_if.end"); 1071 // Generate the branch (If-stmt) 1072 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 1073 CGF.EmitBlock(ThenBlock); 1074 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, BodyOpGen); 1075 // Emit the rest of bblocks/branches 1076 CGF.EmitBranch(ContBlock); 1077 CGF.EmitBlock(ContBlock, true); 1078 } 1079 1080 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 1081 const RegionCodeGenTy &MasterOpGen, 1082 SourceLocation Loc) { 1083 // if(__kmpc_master(ident_t *, gtid)) { 1084 // MasterOpGen(); 1085 // __kmpc_end_master(ident_t *, gtid); 1086 // } 1087 // Prepare arguments and build a call to __kmpc_master 1088 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1089 auto *IsMaster = 1090 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args); 1091 emitIfStmt(CGF, IsMaster, [&](CodeGenFunction &CGF) -> void { 1092 CodeGenFunction::RunCleanupsScope Scope(CGF); 1093 CGF.EHStack.pushCleanup<CallEndCleanup>( 1094 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master), 1095 llvm::makeArrayRef(Args)); 1096 MasterOpGen(CGF); 1097 }); 1098 } 1099 1100 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 1101 SourceLocation Loc) { 1102 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 1103 llvm::Value *Args[] = { 1104 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1105 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 1106 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 1107 } 1108 1109 static llvm::Value *emitCopyprivateCopyFunction( 1110 CodeGenModule &CGM, llvm::Type *ArgsType, 1111 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 1112 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) { 1113 auto &C = CGM.getContext(); 1114 // void copy_func(void *LHSArg, void *RHSArg); 1115 FunctionArgList Args; 1116 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1117 C.VoidPtrTy); 1118 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1119 C.VoidPtrTy); 1120 Args.push_back(&LHSArg); 1121 Args.push_back(&RHSArg); 1122 FunctionType::ExtInfo EI; 1123 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1124 C.VoidTy, Args, EI, /*isVariadic=*/false); 1125 auto *Fn = llvm::Function::Create( 1126 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 1127 ".omp.copyprivate.copy_func", &CGM.getModule()); 1128 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn); 1129 CodeGenFunction CGF(CGM); 1130 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 1131 // Dest = (void*[n])(LHSArg); 1132 // Src = (void*[n])(RHSArg); 1133 auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1134 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg), 1135 CGF.PointerAlignInBytes), 1136 ArgsType); 1137 auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1138 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg), 1139 CGF.PointerAlignInBytes), 1140 ArgsType); 1141 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 1142 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 1143 // ... 1144 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 1145 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 1146 auto *DestAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1147 CGF.Builder.CreateAlignedLoad( 1148 CGF.Builder.CreateStructGEP(nullptr, LHS, I), 1149 CGM.PointerAlignInBytes), 1150 CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType()))); 1151 auto *SrcAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1152 CGF.Builder.CreateAlignedLoad( 1153 CGF.Builder.CreateStructGEP(nullptr, RHS, I), 1154 CGM.PointerAlignInBytes), 1155 CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType()))); 1156 CGF.EmitOMPCopy(CGF, CopyprivateVars[I]->getType(), DestAddr, SrcAddr, 1157 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()), 1158 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()), 1159 AssignmentOps[I]); 1160 } 1161 CGF.FinishFunction(); 1162 return Fn; 1163 } 1164 1165 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 1166 const RegionCodeGenTy &SingleOpGen, 1167 SourceLocation Loc, 1168 ArrayRef<const Expr *> CopyprivateVars, 1169 ArrayRef<const Expr *> SrcExprs, 1170 ArrayRef<const Expr *> DstExprs, 1171 ArrayRef<const Expr *> AssignmentOps) { 1172 assert(CopyprivateVars.size() == SrcExprs.size() && 1173 CopyprivateVars.size() == DstExprs.size() && 1174 CopyprivateVars.size() == AssignmentOps.size()); 1175 auto &C = CGM.getContext(); 1176 // int32 did_it = 0; 1177 // if(__kmpc_single(ident_t *, gtid)) { 1178 // SingleOpGen(); 1179 // __kmpc_end_single(ident_t *, gtid); 1180 // did_it = 1; 1181 // } 1182 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 1183 // <copy_func>, did_it); 1184 1185 llvm::AllocaInst *DidIt = nullptr; 1186 if (!CopyprivateVars.empty()) { 1187 // int32 did_it = 0; 1188 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1189 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 1190 CGF.InitTempAlloca(DidIt, CGF.Builder.getInt32(0)); 1191 } 1192 // Prepare arguments and build a call to __kmpc_single 1193 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1194 auto *IsSingle = 1195 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args); 1196 emitIfStmt(CGF, IsSingle, [&](CodeGenFunction &CGF) -> void { 1197 CodeGenFunction::RunCleanupsScope Scope(CGF); 1198 CGF.EHStack.pushCleanup<CallEndCleanup>( 1199 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single), 1200 llvm::makeArrayRef(Args)); 1201 SingleOpGen(CGF); 1202 if (DidIt) { 1203 // did_it = 1; 1204 CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt, 1205 DidIt->getAlignment()); 1206 } 1207 }); 1208 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 1209 // <copy_func>, did_it); 1210 if (DidIt) { 1211 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 1212 auto CopyprivateArrayTy = 1213 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 1214 /*IndexTypeQuals=*/0); 1215 // Create a list of all private variables for copyprivate. 1216 auto *CopyprivateList = 1217 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 1218 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 1219 auto *Elem = CGF.Builder.CreateStructGEP( 1220 CopyprivateList->getAllocatedType(), CopyprivateList, I); 1221 CGF.Builder.CreateAlignedStore( 1222 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1223 CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy), 1224 Elem, CGM.PointerAlignInBytes); 1225 } 1226 // Build function that copies private values from single region to all other 1227 // threads in the corresponding parallel region. 1228 auto *CpyFn = emitCopyprivateCopyFunction( 1229 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 1230 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); 1231 auto *BufSize = CGF.Builder.getInt32( 1232 C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity()); 1233 auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 1234 CGF.VoidPtrTy); 1235 auto *DidItVal = 1236 CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes); 1237 llvm::Value *Args[] = { 1238 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 1239 getThreadID(CGF, Loc), // i32 <gtid> 1240 BufSize, // i32 <buf_size> 1241 CL, // void *<copyprivate list> 1242 CpyFn, // void (*) (void *, void *) <copy_func> 1243 DidItVal // i32 did_it 1244 }; 1245 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 1246 } 1247 } 1248 1249 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 1250 OpenMPDirectiveKind Kind) { 1251 // Build call __kmpc_cancel_barrier(loc, thread_id); 1252 OpenMPLocationFlags Flags = OMP_IDENT_KMPC; 1253 if (Kind == OMPD_for) { 1254 Flags = 1255 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR); 1256 } else if (Kind == OMPD_sections) { 1257 Flags = static_cast<OpenMPLocationFlags>(Flags | 1258 OMP_IDENT_BARRIER_IMPL_SECTIONS); 1259 } else if (Kind == OMPD_single) { 1260 Flags = 1261 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE); 1262 } else if (Kind == OMPD_barrier) { 1263 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL); 1264 } else { 1265 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL); 1266 } 1267 // Build call __kmpc_cancel_barrier(loc, thread_id); 1268 // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this 1269 // one provides the same functionality and adds initial support for 1270 // cancellation constructs introduced in OpenMP 4.0. __kmpc_cancel_barrier() 1271 // is provided default by the runtime library so it safe to make such 1272 // replacement. 1273 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 1274 getThreadID(CGF, Loc)}; 1275 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 1276 } 1277 1278 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 1279 /// the enum sched_type in kmp.h). 1280 enum OpenMPSchedType { 1281 /// \brief Lower bound for default (unordered) versions. 1282 OMP_sch_lower = 32, 1283 OMP_sch_static_chunked = 33, 1284 OMP_sch_static = 34, 1285 OMP_sch_dynamic_chunked = 35, 1286 OMP_sch_guided_chunked = 36, 1287 OMP_sch_runtime = 37, 1288 OMP_sch_auto = 38, 1289 /// \brief Lower bound for 'ordered' versions. 1290 OMP_ord_lower = 64, 1291 /// \brief Lower bound for 'nomerge' versions. 1292 OMP_nm_lower = 160, 1293 }; 1294 1295 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 1296 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 1297 bool Chunked) { 1298 switch (ScheduleKind) { 1299 case OMPC_SCHEDULE_static: 1300 return Chunked ? OMP_sch_static_chunked : OMP_sch_static; 1301 case OMPC_SCHEDULE_dynamic: 1302 return OMP_sch_dynamic_chunked; 1303 case OMPC_SCHEDULE_guided: 1304 return OMP_sch_guided_chunked; 1305 case OMPC_SCHEDULE_auto: 1306 return OMP_sch_auto; 1307 case OMPC_SCHEDULE_runtime: 1308 return OMP_sch_runtime; 1309 case OMPC_SCHEDULE_unknown: 1310 assert(!Chunked && "chunk was specified but schedule kind not known"); 1311 return OMP_sch_static; 1312 } 1313 llvm_unreachable("Unexpected runtime schedule"); 1314 } 1315 1316 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 1317 bool Chunked) const { 1318 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 1319 return Schedule == OMP_sch_static; 1320 } 1321 1322 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 1323 auto Schedule = getRuntimeSchedule(ScheduleKind, /* Chunked */ false); 1324 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 1325 return Schedule != OMP_sch_static; 1326 } 1327 1328 void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc, 1329 OpenMPScheduleClauseKind ScheduleKind, 1330 unsigned IVSize, bool IVSigned, 1331 llvm::Value *IL, llvm::Value *LB, 1332 llvm::Value *UB, llvm::Value *ST, 1333 llvm::Value *Chunk) { 1334 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr); 1335 if (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked) { 1336 // Call __kmpc_dispatch_init( 1337 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 1338 // kmp_int[32|64] lower, kmp_int[32|64] upper, 1339 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 1340 1341 // If the Chunk was not specified in the clause - use default value 1. 1342 if (Chunk == nullptr) 1343 Chunk = CGF.Builder.getIntN(IVSize, 1); 1344 llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1345 getThreadID(CGF, Loc), 1346 CGF.Builder.getInt32(Schedule), // Schedule type 1347 CGF.Builder.getIntN(IVSize, 0), // Lower 1348 UB, // Upper 1349 CGF.Builder.getIntN(IVSize, 1), // Stride 1350 Chunk // Chunk 1351 }; 1352 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 1353 } else { 1354 // Call __kmpc_for_static_init( 1355 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 1356 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 1357 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 1358 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 1359 if (Chunk == nullptr) { 1360 assert(Schedule == OMP_sch_static && 1361 "expected static non-chunked schedule"); 1362 // If the Chunk was not specified in the clause - use default value 1. 1363 Chunk = CGF.Builder.getIntN(IVSize, 1); 1364 } else 1365 assert(Schedule == OMP_sch_static_chunked && 1366 "expected static chunked schedule"); 1367 llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1368 getThreadID(CGF, Loc), 1369 CGF.Builder.getInt32(Schedule), // Schedule type 1370 IL, // &isLastIter 1371 LB, // &LB 1372 UB, // &UB 1373 ST, // &Stride 1374 CGF.Builder.getIntN(IVSize, 1), // Incr 1375 Chunk // Chunk 1376 }; 1377 CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args); 1378 } 1379 } 1380 1381 void CGOpenMPRuntime::emitForFinish(CodeGenFunction &CGF, SourceLocation Loc, 1382 OpenMPScheduleClauseKind ScheduleKind) { 1383 assert((ScheduleKind == OMPC_SCHEDULE_static || 1384 ScheduleKind == OMPC_SCHEDULE_unknown) && 1385 "Non-static schedule kinds are not yet implemented"); 1386 (void)ScheduleKind; 1387 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 1388 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1389 getThreadID(CGF, Loc)}; 1390 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 1391 Args); 1392 } 1393 1394 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 1395 SourceLocation Loc, unsigned IVSize, 1396 bool IVSigned, llvm::Value *IL, 1397 llvm::Value *LB, llvm::Value *UB, 1398 llvm::Value *ST) { 1399 // Call __kmpc_dispatch_next( 1400 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 1401 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 1402 // kmp_int[32|64] *p_stride); 1403 llvm::Value *Args[] = { 1404 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc), 1405 IL, // &isLastIter 1406 LB, // &Lower 1407 UB, // &Upper 1408 ST // &Stride 1409 }; 1410 llvm::Value *Call = 1411 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 1412 return CGF.EmitScalarConversion( 1413 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 1414 CGF.getContext().BoolTy); 1415 } 1416 1417 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 1418 llvm::Value *NumThreads, 1419 SourceLocation Loc) { 1420 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 1421 llvm::Value *Args[] = { 1422 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1423 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 1424 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 1425 Args); 1426 } 1427 1428 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 1429 SourceLocation Loc) { 1430 // Build call void __kmpc_flush(ident_t *loc) 1431 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 1432 emitUpdateLocation(CGF, Loc)); 1433 } 1434 1435 namespace { 1436 /// \brief Indexes of fields for type kmp_task_t. 1437 enum KmpTaskTFields { 1438 /// \brief List of shared variables. 1439 KmpTaskTShareds, 1440 /// \brief Task routine. 1441 KmpTaskTRoutine, 1442 /// \brief Partition id for the untied tasks. 1443 KmpTaskTPartId, 1444 /// \brief Function with call of destructors for private variables. 1445 KmpTaskTDestructors, 1446 }; 1447 } // namespace 1448 1449 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 1450 if (!KmpRoutineEntryPtrTy) { 1451 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 1452 auto &C = CGM.getContext(); 1453 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 1454 FunctionProtoType::ExtProtoInfo EPI; 1455 KmpRoutineEntryPtrQTy = C.getPointerType( 1456 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 1457 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 1458 } 1459 } 1460 1461 static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1462 QualType FieldTy) { 1463 auto *Field = FieldDecl::Create( 1464 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1465 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1466 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1467 Field->setAccess(AS_public); 1468 DC->addDecl(Field); 1469 } 1470 1471 static QualType createKmpTaskTRecordDecl(CodeGenModule &CGM, 1472 QualType KmpInt32Ty, 1473 QualType KmpRoutineEntryPointerQTy) { 1474 auto &C = CGM.getContext(); 1475 // Build struct kmp_task_t { 1476 // void * shareds; 1477 // kmp_routine_entry_t routine; 1478 // kmp_int32 part_id; 1479 // kmp_routine_entry_t destructors; 1480 // /* private vars */ 1481 // }; 1482 auto *RD = C.buildImplicitRecord("kmp_task_t"); 1483 RD->startDefinition(); 1484 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1485 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 1486 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1487 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 1488 // TODO: add private fields. 1489 RD->completeDefinition(); 1490 return C.getRecordType(RD); 1491 } 1492 1493 /// \brief Emit a proxy function which accepts kmp_task_t as the second 1494 /// argument. 1495 /// \code 1496 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 1497 /// TaskFunction(gtid, tt->part_id, tt->shareds); 1498 /// return 0; 1499 /// } 1500 /// \endcode 1501 static llvm::Value * 1502 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 1503 QualType KmpInt32Ty, QualType KmpTaskTPtrQTy, 1504 QualType SharedsPtrTy, llvm::Value *TaskFunction, 1505 llvm::Type *KmpTaskTTy) { 1506 auto &C = CGM.getContext(); 1507 FunctionArgList Args; 1508 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 1509 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 1510 /*Id=*/nullptr, KmpTaskTPtrQTy); 1511 Args.push_back(&GtidArg); 1512 Args.push_back(&TaskTypeArg); 1513 FunctionType::ExtInfo Info; 1514 auto &TaskEntryFnInfo = 1515 CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, 1516 /*isVariadic=*/false); 1517 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 1518 auto *TaskEntry = 1519 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 1520 ".omp_task_entry.", &CGM.getModule()); 1521 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry); 1522 CodeGenFunction CGF(CGM); 1523 CGF.disableDebugInfo(); 1524 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); 1525 1526 // TaskFunction(gtid, tt->part_id, tt->shareds); 1527 auto *GtidParam = CGF.EmitLoadOfScalar( 1528 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, 1529 C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc); 1530 auto TaskTypeArgAddr = CGF.EmitLoadOfScalar( 1531 CGF.GetAddrOfLocalVar(&TaskTypeArg), /*Volatile=*/false, 1532 CGM.PointerAlignInBytes, KmpTaskTPtrQTy, Loc); 1533 auto *PartidPtr = CGF.Builder.CreateStructGEP(KmpTaskTTy, TaskTypeArgAddr, 1534 /*Idx=*/KmpTaskTPartId); 1535 auto *PartidParam = CGF.EmitLoadOfScalar( 1536 PartidPtr, /*Volatile=*/false, 1537 C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc); 1538 auto *SharedsPtr = CGF.Builder.CreateStructGEP(KmpTaskTTy, TaskTypeArgAddr, 1539 /*Idx=*/KmpTaskTShareds); 1540 auto *SharedsParam = 1541 CGF.EmitLoadOfScalar(SharedsPtr, /*Volatile=*/false, 1542 CGM.PointerAlignInBytes, C.VoidPtrTy, Loc); 1543 llvm::Value *CallArgs[] = { 1544 GtidParam, PartidParam, 1545 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1546 SharedsParam, CGF.ConvertTypeForMem(SharedsPtrTy))}; 1547 CGF.EmitCallOrInvoke(TaskFunction, CallArgs); 1548 CGF.EmitStoreThroughLValue( 1549 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 1550 CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 1551 CGF.FinishFunction(); 1552 return TaskEntry; 1553 } 1554 1555 void CGOpenMPRuntime::emitTaskCall( 1556 CodeGenFunction &CGF, SourceLocation Loc, bool Tied, 1557 llvm::PointerIntPair<llvm::Value *, 1, bool> Final, 1558 llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds) { 1559 auto &C = CGM.getContext(); 1560 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1561 // Build type kmp_routine_entry_t (if not built yet). 1562 emitKmpRoutineEntryT(KmpInt32Ty); 1563 // Build particular struct kmp_task_t for the given task. 1564 auto KmpTaskQTy = 1565 createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy); 1566 QualType KmpTaskTPtrQTy = C.getPointerType(KmpTaskQTy); 1567 auto *KmpTaskTTy = CGF.ConvertType(KmpTaskQTy); 1568 auto *KmpTaskTPtrTy = KmpTaskTTy->getPointerTo(); 1569 auto KmpTaskTySize = CGM.getSize(C.getTypeSizeInChars(KmpTaskQTy)); 1570 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 1571 1572 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 1573 // kmp_task_t *tt); 1574 auto *TaskEntry = 1575 emitProxyTaskFunction(CGM, Loc, KmpInt32Ty, KmpTaskTPtrQTy, SharedsPtrTy, 1576 TaskFunction, KmpTaskTTy); 1577 1578 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1579 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1580 // kmp_routine_entry_t *task_entry); 1581 // Task flags. Format is taken from 1582 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 1583 // description of kmp_tasking_flags struct. 1584 const unsigned TiedFlag = 0x1; 1585 const unsigned FinalFlag = 0x2; 1586 unsigned Flags = Tied ? TiedFlag : 0; 1587 auto *TaskFlags = 1588 Final.getPointer() 1589 ? CGF.Builder.CreateSelect(Final.getPointer(), 1590 CGF.Builder.getInt32(FinalFlag), 1591 CGF.Builder.getInt32(/*C=*/0)) 1592 : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0); 1593 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 1594 auto SharedsSize = C.getTypeSizeInChars(SharedsTy); 1595 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 1596 getThreadID(CGF, Loc), TaskFlags, KmpTaskTySize, 1597 CGM.getSize(SharedsSize), 1598 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1599 TaskEntry, KmpRoutineEntryPtrTy)}; 1600 auto *NewTask = CGF.EmitRuntimeCall( 1601 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 1602 auto *NewTaskNewTaskTTy = 1603 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(NewTask, KmpTaskTPtrTy); 1604 // Fill the data in the resulting kmp_task_t record. 1605 // Copy shareds if there are any. 1606 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) 1607 CGF.EmitAggregateCopy( 1608 CGF.EmitLoadOfScalar( 1609 CGF.Builder.CreateStructGEP(KmpTaskTTy, NewTaskNewTaskTTy, 1610 /*Idx=*/KmpTaskTShareds), 1611 /*Volatile=*/false, CGM.PointerAlignInBytes, SharedsPtrTy, Loc), 1612 Shareds, SharedsTy); 1613 // TODO: generate function with destructors for privates. 1614 // Provide pointer to function with destructors for privates. 1615 CGF.Builder.CreateAlignedStore( 1616 llvm::ConstantPointerNull::get( 1617 cast<llvm::PointerType>(KmpRoutineEntryPtrTy)), 1618 CGF.Builder.CreateStructGEP(KmpTaskTTy, NewTaskNewTaskTTy, 1619 /*Idx=*/KmpTaskTDestructors), 1620 CGM.PointerAlignInBytes); 1621 1622 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 1623 // libcall. 1624 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1625 // *new_task); 1626 llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc), 1627 getThreadID(CGF, Loc), NewTask}; 1628 // TODO: add check for untied tasks. 1629 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1630 } 1631 1632 static llvm::Value *emitReductionFunction(CodeGenModule &CGM, 1633 llvm::Type *ArgsType, 1634 ArrayRef<const Expr *> LHSExprs, 1635 ArrayRef<const Expr *> RHSExprs, 1636 ArrayRef<const Expr *> ReductionOps) { 1637 auto &C = CGM.getContext(); 1638 1639 // void reduction_func(void *LHSArg, void *RHSArg); 1640 FunctionArgList Args; 1641 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1642 C.VoidPtrTy); 1643 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 1644 C.VoidPtrTy); 1645 Args.push_back(&LHSArg); 1646 Args.push_back(&RHSArg); 1647 FunctionType::ExtInfo EI; 1648 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( 1649 C.VoidTy, Args, EI, /*isVariadic=*/false); 1650 auto *Fn = llvm::Function::Create( 1651 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 1652 ".omp.reduction.reduction_func", &CGM.getModule()); 1653 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn); 1654 CodeGenFunction CGF(CGM); 1655 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 1656 1657 // Dst = (void*[n])(LHSArg); 1658 // Src = (void*[n])(RHSArg); 1659 auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1660 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg), 1661 CGF.PointerAlignInBytes), 1662 ArgsType); 1663 auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1664 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg), 1665 CGF.PointerAlignInBytes), 1666 ArgsType); 1667 1668 // ... 1669 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 1670 // ... 1671 CodeGenFunction::OMPPrivateScope Scope(CGF); 1672 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) { 1673 Scope.addPrivate( 1674 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()), 1675 [&]() -> llvm::Value *{ 1676 return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1677 CGF.Builder.CreateAlignedLoad( 1678 CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, RHS, I), 1679 CGM.PointerAlignInBytes), 1680 CGF.ConvertTypeForMem(C.getPointerType(RHSExprs[I]->getType()))); 1681 }); 1682 Scope.addPrivate( 1683 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()), 1684 [&]() -> llvm::Value *{ 1685 return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1686 CGF.Builder.CreateAlignedLoad( 1687 CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, LHS, I), 1688 CGM.PointerAlignInBytes), 1689 CGF.ConvertTypeForMem(C.getPointerType(LHSExprs[I]->getType()))); 1690 }); 1691 } 1692 Scope.Privatize(); 1693 for (auto *E : ReductionOps) { 1694 CGF.EmitIgnoredExpr(E); 1695 } 1696 Scope.ForceCleanup(); 1697 CGF.FinishFunction(); 1698 return Fn; 1699 } 1700 1701 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 1702 ArrayRef<const Expr *> LHSExprs, 1703 ArrayRef<const Expr *> RHSExprs, 1704 ArrayRef<const Expr *> ReductionOps, 1705 bool WithNowait) { 1706 // Next code should be emitted for reduction: 1707 // 1708 // static kmp_critical_name lock = { 0 }; 1709 // 1710 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 1711 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 1712 // ... 1713 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 1714 // *(Type<n>-1*)rhs[<n>-1]); 1715 // } 1716 // 1717 // ... 1718 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 1719 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 1720 // RedList, reduce_func, &<lock>)) { 1721 // case 1: 1722 // ... 1723 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 1724 // ... 1725 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 1726 // break; 1727 // case 2: 1728 // ... 1729 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 1730 // ... 1731 // break; 1732 // default:; 1733 // } 1734 1735 auto &C = CGM.getContext(); 1736 1737 // 1. Build a list of reduction variables. 1738 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 1739 llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size()); 1740 QualType ReductionArrayTy = 1741 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 1742 /*IndexTypeQuals=*/0); 1743 auto *ReductionList = 1744 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 1745 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) { 1746 auto *Elem = CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, ReductionList, I); 1747 CGF.Builder.CreateAlignedStore( 1748 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1749 CGF.EmitLValue(RHSExprs[I]).getAddress(), CGF.VoidPtrTy), 1750 Elem, CGM.PointerAlignInBytes); 1751 } 1752 1753 // 2. Emit reduce_func(). 1754 auto *ReductionFn = emitReductionFunction( 1755 CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), LHSExprs, 1756 RHSExprs, ReductionOps); 1757 1758 // 3. Create static kmp_critical_name lock = { 0 }; 1759 auto *Lock = getCriticalRegionLock(".reduction"); 1760 1761 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 1762 // RedList, reduce_func, &<lock>); 1763 auto *IdentTLoc = emitUpdateLocation( 1764 CGF, Loc, 1765 static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE)); 1766 auto *ThreadId = getThreadID(CGF, Loc); 1767 auto *ReductionArrayTySize = llvm::ConstantInt::get( 1768 CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity()); 1769 auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, 1770 CGF.VoidPtrTy); 1771 llvm::Value *Args[] = { 1772 IdentTLoc, // ident_t *<loc> 1773 ThreadId, // i32 <gtid> 1774 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 1775 ReductionArrayTySize, // size_type sizeof(RedList) 1776 RL, // void *RedList 1777 ReductionFn, // void (*) (void *, void *) <reduce_func> 1778 Lock // kmp_critical_name *&<lock> 1779 }; 1780 auto Res = CGF.EmitRuntimeCall( 1781 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 1782 : OMPRTL__kmpc_reduce), 1783 Args); 1784 1785 // 5. Build switch(res) 1786 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 1787 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 1788 1789 // 6. Build case 1: 1790 // ... 1791 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 1792 // ... 1793 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 1794 // break; 1795 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 1796 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 1797 CGF.EmitBlock(Case1BB); 1798 1799 { 1800 CodeGenFunction::RunCleanupsScope Scope(CGF); 1801 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 1802 llvm::Value *EndArgs[] = { 1803 IdentTLoc, // ident_t *<loc> 1804 ThreadId, // i32 <gtid> 1805 Lock // kmp_critical_name *&<lock> 1806 }; 1807 CGF.EHStack.pushCleanup<CallEndCleanup>( 1808 NormalAndEHCleanup, 1809 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 1810 : OMPRTL__kmpc_end_reduce), 1811 llvm::makeArrayRef(EndArgs)); 1812 for (auto *E : ReductionOps) { 1813 CGF.EmitIgnoredExpr(E); 1814 } 1815 } 1816 1817 CGF.EmitBranch(DefaultBB); 1818 1819 // 7. Build case 2: 1820 // ... 1821 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 1822 // ... 1823 // break; 1824 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 1825 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 1826 CGF.EmitBlock(Case2BB); 1827 1828 { 1829 CodeGenFunction::RunCleanupsScope Scope(CGF); 1830 auto I = LHSExprs.begin(); 1831 for (auto *E : ReductionOps) { 1832 const Expr *XExpr = nullptr; 1833 const Expr *EExpr = nullptr; 1834 const Expr *UpExpr = nullptr; 1835 BinaryOperatorKind BO = BO_Comma; 1836 // Try to emit update expression as a simple atomic. 1837 if (auto *ACO = dyn_cast<AbstractConditionalOperator>(E)) { 1838 // If this is a conditional operator, analyze it's condition for 1839 // min/max reduction operator. 1840 E = ACO->getCond(); 1841 } 1842 if (auto *BO = dyn_cast<BinaryOperator>(E)) { 1843 if (BO->getOpcode() == BO_Assign) { 1844 XExpr = BO->getLHS(); 1845 UpExpr = BO->getRHS(); 1846 } 1847 } 1848 // Analyze RHS part of the whole expression. 1849 if (UpExpr) { 1850 if (auto *BORHS = 1851 dyn_cast<BinaryOperator>(UpExpr->IgnoreParenImpCasts())) { 1852 EExpr = BORHS->getRHS(); 1853 BO = BORHS->getOpcode(); 1854 } 1855 } 1856 if (XExpr) { 1857 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1858 LValue X = CGF.EmitLValue(XExpr); 1859 RValue E; 1860 if (EExpr) 1861 E = CGF.EmitAnyExpr(EExpr); 1862 CGF.EmitOMPAtomicSimpleUpdateExpr( 1863 X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc, 1864 [&CGF, UpExpr, VD](RValue XRValue) { 1865 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 1866 PrivateScope.addPrivate( 1867 VD, [&CGF, VD, XRValue]() -> llvm::Value *{ 1868 auto *LHSTemp = CGF.CreateMemTemp(VD->getType()); 1869 CGF.EmitStoreThroughLValue( 1870 XRValue, 1871 CGF.MakeNaturalAlignAddrLValue(LHSTemp, VD->getType())); 1872 return LHSTemp; 1873 }); 1874 (void)PrivateScope.Privatize(); 1875 return CGF.EmitAnyExpr(UpExpr); 1876 }); 1877 } else { 1878 // Emit as a critical region. 1879 emitCriticalRegion(CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) { 1880 CGF.EmitIgnoredExpr(E); 1881 }, Loc); 1882 } 1883 ++I; 1884 } 1885 } 1886 1887 CGF.EmitBranch(DefaultBB); 1888 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 1889 } 1890 1891 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 1892 const RegionCodeGenTy &CodeGen) { 1893 InlinedOpenMPRegionRAII Region(CGF, CodeGen); 1894 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 1895 } 1896 1897