1 //===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the PassManagerBuilder class, which is used to set up a 11 // "standard" optimization sequence suitable for languages like C and C++. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/PassManagerBuilder.h" 16 #include "llvm-c/Transforms/PassManagerBuilder.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/Analysis/BasicAliasAnalysis.h" 19 #include "llvm/Analysis/CFLAndersAliasAnalysis.h" 20 #include "llvm/Analysis/CFLSteensAliasAnalysis.h" 21 #include "llvm/Analysis/GlobalsModRef.h" 22 #include "llvm/Analysis/InlineCost.h" 23 #include "llvm/Analysis/Passes.h" 24 #include "llvm/Analysis/ScopedNoAliasAA.h" 25 #include "llvm/Analysis/TargetLibraryInfo.h" 26 #include "llvm/Analysis/TypeBasedAliasAnalysis.h" 27 #include "llvm/IR/DataLayout.h" 28 #include "llvm/IR/LegacyPassManager.h" 29 #include "llvm/IR/Verifier.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/ManagedStatic.h" 32 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" 33 #include "llvm/Transforms/IPO.h" 34 #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" 35 #include "llvm/Transforms/IPO/FunctionAttrs.h" 36 #include "llvm/Transforms/IPO/InferFunctionAttrs.h" 37 #include "llvm/Transforms/InstCombine/InstCombine.h" 38 #include "llvm/Transforms/Instrumentation.h" 39 #include "llvm/Transforms/Scalar.h" 40 #include "llvm/Transforms/Scalar/GVN.h" 41 #include "llvm/Transforms/Scalar/InstSimplifyPass.h" 42 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" 43 #include "llvm/Transforms/Utils.h" 44 #include "llvm/Transforms/Vectorize.h" 45 46 using namespace llvm; 47 48 static cl::opt<bool> 49 RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, 50 cl::ZeroOrMore, cl::desc("Run Partial inlinining pass")); 51 52 static cl::opt<bool> 53 RunLoopVectorization("vectorize-loops", cl::Hidden, 54 cl::desc("Run the Loop vectorization passes")); 55 56 static cl::opt<bool> 57 RunSLPVectorization("vectorize-slp", cl::Hidden, 58 cl::desc("Run the SLP vectorization passes")); 59 60 static cl::opt<bool> 61 UseGVNAfterVectorization("use-gvn-after-vectorization", 62 cl::init(false), cl::Hidden, 63 cl::desc("Run GVN instead of Early CSE after vectorization passes")); 64 65 static cl::opt<bool> ExtraVectorizerPasses( 66 "extra-vectorizer-passes", cl::init(false), cl::Hidden, 67 cl::desc("Run cleanup optimization passes after vectorization.")); 68 69 static cl::opt<bool> 70 RunLoopRerolling("reroll-loops", cl::Hidden, 71 cl::desc("Run the loop rerolling pass")); 72 73 static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, 74 cl::desc("Run the NewGVN pass")); 75 76 static cl::opt<bool> 77 RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization", 78 cl::init(true), cl::Hidden, 79 cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop " 80 "vectorizer instead of before")); 81 82 // Experimental option to use CFL-AA 83 enum class CFLAAType { None, Steensgaard, Andersen, Both }; 84 static cl::opt<CFLAAType> 85 UseCFLAA("use-cfl-aa", cl::init(CFLAAType::None), cl::Hidden, 86 cl::desc("Enable the new, experimental CFL alias analysis"), 87 cl::values(clEnumValN(CFLAAType::None, "none", "Disable CFL-AA"), 88 clEnumValN(CFLAAType::Steensgaard, "steens", 89 "Enable unification-based CFL-AA"), 90 clEnumValN(CFLAAType::Andersen, "anders", 91 "Enable inclusion-based CFL-AA"), 92 clEnumValN(CFLAAType::Both, "both", 93 "Enable both variants of CFL-AA"))); 94 95 static cl::opt<bool> EnableLoopInterchange( 96 "enable-loopinterchange", cl::init(false), cl::Hidden, 97 cl::desc("Enable the new, experimental LoopInterchange Pass")); 98 99 static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam", 100 cl::init(false), cl::Hidden, 101 cl::desc("Enable Unroll And Jam Pass")); 102 103 static cl::opt<bool> 104 EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden, 105 cl::desc("Enable preparation for ThinLTO.")); 106 107 static cl::opt<bool> RunPGOInstrGen( 108 "profile-generate", cl::init(false), cl::Hidden, 109 cl::desc("Enable PGO instrumentation.")); 110 111 static cl::opt<std::string> 112 PGOOutputFile("profile-generate-file", cl::init(""), cl::Hidden, 113 cl::desc("Specify the path of profile data file.")); 114 115 static cl::opt<std::string> RunPGOInstrUse( 116 "profile-use", cl::init(""), cl::Hidden, cl::value_desc("filename"), 117 cl::desc("Enable use phase of PGO instrumentation and specify the path " 118 "of profile data file")); 119 120 static cl::opt<bool> UseLoopVersioningLICM( 121 "enable-loop-versioning-licm", cl::init(false), cl::Hidden, 122 cl::desc("Enable the experimental Loop Versioning LICM pass")); 123 124 static cl::opt<bool> 125 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, 126 cl::desc("Disable pre-instrumentation inliner")); 127 128 static cl::opt<int> PreInlineThreshold( 129 "preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore, 130 cl::desc("Control the amount of inlining in pre-instrumentation inliner " 131 "(default = 75)")); 132 133 static cl::opt<bool> EnableEarlyCSEMemSSA( 134 "enable-earlycse-memssa", cl::init(true), cl::Hidden, 135 cl::desc("Enable the EarlyCSE w/ MemorySSA pass (default = on)")); 136 137 static cl::opt<bool> EnableGVNHoist( 138 "enable-gvn-hoist", cl::init(false), cl::Hidden, 139 cl::desc("Enable the GVN hoisting pass (default = off)")); 140 141 static cl::opt<bool> 142 DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false), 143 cl::Hidden, 144 cl::desc("Disable shrink-wrap library calls")); 145 146 static cl::opt<bool> EnableSimpleLoopUnswitch( 147 "enable-simple-loop-unswitch", cl::init(false), cl::Hidden, 148 cl::desc("Enable the simple loop unswitch pass. Also enables independent " 149 "cleanup passes integrated into the loop pass manager pipeline.")); 150 151 static cl::opt<bool> EnableGVNSink( 152 "enable-gvn-sink", cl::init(false), cl::Hidden, 153 cl::desc("Enable the GVN sinking pass (default = off)")); 154 155 PassManagerBuilder::PassManagerBuilder() { 156 OptLevel = 2; 157 SizeLevel = 0; 158 LibraryInfo = nullptr; 159 Inliner = nullptr; 160 DisableUnrollLoops = false; 161 SLPVectorize = RunSLPVectorization; 162 LoopVectorize = RunLoopVectorization; 163 RerollLoops = RunLoopRerolling; 164 NewGVN = RunNewGVN; 165 DisableGVNLoadPRE = false; 166 VerifyInput = false; 167 VerifyOutput = false; 168 MergeFunctions = false; 169 PrepareForLTO = false; 170 EnablePGOInstrGen = RunPGOInstrGen; 171 PGOInstrGen = PGOOutputFile; 172 PGOInstrUse = RunPGOInstrUse; 173 PrepareForThinLTO = EnablePrepareForThinLTO; 174 PerformThinLTO = false; 175 DivergentTarget = false; 176 } 177 178 PassManagerBuilder::~PassManagerBuilder() { 179 delete LibraryInfo; 180 delete Inliner; 181 } 182 183 /// Set of global extensions, automatically added as part of the standard set. 184 static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy, 185 PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions; 186 187 /// Check if GlobalExtensions is constructed and not empty. 188 /// Since GlobalExtensions is a managed static, calling 'empty()' will trigger 189 /// the construction of the object. 190 static bool GlobalExtensionsNotEmpty() { 191 return GlobalExtensions.isConstructed() && !GlobalExtensions->empty(); 192 } 193 194 void PassManagerBuilder::addGlobalExtension( 195 PassManagerBuilder::ExtensionPointTy Ty, 196 PassManagerBuilder::ExtensionFn Fn) { 197 GlobalExtensions->push_back(std::make_pair(Ty, std::move(Fn))); 198 } 199 200 void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) { 201 Extensions.push_back(std::make_pair(Ty, std::move(Fn))); 202 } 203 204 void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy, 205 legacy::PassManagerBase &PM) const { 206 if (GlobalExtensionsNotEmpty()) { 207 for (auto &Ext : *GlobalExtensions) { 208 if (Ext.first == ETy) 209 Ext.second(*this, PM); 210 } 211 } 212 for (unsigned i = 0, e = Extensions.size(); i != e; ++i) 213 if (Extensions[i].first == ETy) 214 Extensions[i].second(*this, PM); 215 } 216 217 void PassManagerBuilder::addInitialAliasAnalysisPasses( 218 legacy::PassManagerBase &PM) const { 219 switch (UseCFLAA) { 220 case CFLAAType::Steensgaard: 221 PM.add(createCFLSteensAAWrapperPass()); 222 break; 223 case CFLAAType::Andersen: 224 PM.add(createCFLAndersAAWrapperPass()); 225 break; 226 case CFLAAType::Both: 227 PM.add(createCFLSteensAAWrapperPass()); 228 PM.add(createCFLAndersAAWrapperPass()); 229 break; 230 default: 231 break; 232 } 233 234 // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that 235 // BasicAliasAnalysis wins if they disagree. This is intended to help 236 // support "obvious" type-punning idioms. 237 PM.add(createTypeBasedAAWrapperPass()); 238 PM.add(createScopedNoAliasAAWrapperPass()); 239 } 240 241 void PassManagerBuilder::addInstructionCombiningPass( 242 legacy::PassManagerBase &PM) const { 243 bool ExpensiveCombines = OptLevel > 2; 244 PM.add(createInstructionCombiningPass(ExpensiveCombines)); 245 } 246 247 void PassManagerBuilder::populateFunctionPassManager( 248 legacy::FunctionPassManager &FPM) { 249 addExtensionsToPM(EP_EarlyAsPossible, FPM); 250 FPM.add(createEntryExitInstrumenterPass()); 251 252 // Add LibraryInfo if we have some. 253 if (LibraryInfo) 254 FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); 255 256 if (OptLevel == 0) return; 257 258 addInitialAliasAnalysisPasses(FPM); 259 260 FPM.add(createCFGSimplificationPass()); 261 FPM.add(createSROAPass()); 262 FPM.add(createEarlyCSEPass()); 263 FPM.add(createLowerExpectIntrinsicPass()); 264 } 265 266 // Do PGO instrumentation generation or use pass as the option specified. 267 void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) { 268 if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty()) 269 return; 270 // Perform the preinline and cleanup passes for O1 and above. 271 // And avoid doing them if optimizing for size. 272 if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner && 273 PGOSampleUse.empty()) { 274 // Create preinline pass. We construct an InlineParams object and specify 275 // the threshold here to avoid the command line options of the regular 276 // inliner to influence pre-inlining. The only fields of InlineParams we 277 // care about are DefaultThreshold and HintThreshold. 278 InlineParams IP; 279 IP.DefaultThreshold = PreInlineThreshold; 280 // FIXME: The hint threshold has the same value used by the regular inliner. 281 // This should probably be lowered after performance testing. 282 IP.HintThreshold = 325; 283 284 MPM.add(createFunctionInliningPass(IP)); 285 MPM.add(createSROAPass()); 286 MPM.add(createEarlyCSEPass()); // Catch trivial redundancies 287 MPM.add(createCFGSimplificationPass()); // Merge & remove BBs 288 MPM.add(createInstructionCombiningPass()); // Combine silly seq's 289 addExtensionsToPM(EP_Peephole, MPM); 290 } 291 if (EnablePGOInstrGen) { 292 MPM.add(createPGOInstrumentationGenLegacyPass()); 293 // Add the profile lowering pass. 294 InstrProfOptions Options; 295 if (!PGOInstrGen.empty()) 296 Options.InstrProfileOutput = PGOInstrGen; 297 Options.DoCounterPromotion = true; 298 MPM.add(createLoopRotatePass()); 299 MPM.add(createInstrProfilingLegacyPass(Options)); 300 } 301 if (!PGOInstrUse.empty()) 302 MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse)); 303 // Indirect call promotion that promotes intra-module targets only. 304 // For ThinLTO this is done earlier due to interactions with globalopt 305 // for imported functions. We don't run this at -O0. 306 if (OptLevel > 0) 307 MPM.add( 308 createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty())); 309 } 310 void PassManagerBuilder::addFunctionSimplificationPasses( 311 legacy::PassManagerBase &MPM) { 312 // Start of function pass. 313 // Break up aggregate allocas, using SSAUpdater. 314 MPM.add(createSROAPass()); 315 MPM.add(createEarlyCSEPass(EnableEarlyCSEMemSSA)); // Catch trivial redundancies 316 if (EnableGVNHoist) 317 MPM.add(createGVNHoistPass()); 318 if (EnableGVNSink) { 319 MPM.add(createGVNSinkPass()); 320 MPM.add(createCFGSimplificationPass()); 321 } 322 323 // Speculative execution if the target has divergent branches; otherwise nop. 324 MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); 325 MPM.add(createJumpThreadingPass()); // Thread jumps. 326 MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals 327 MPM.add(createCFGSimplificationPass()); // Merge & remove BBs 328 // Combine silly seq's 329 if (OptLevel > 2) 330 MPM.add(createAggressiveInstCombinerPass()); 331 addInstructionCombiningPass(MPM); 332 if (SizeLevel == 0 && !DisableLibCallsShrinkWrap) 333 MPM.add(createLibCallsShrinkWrapPass()); 334 addExtensionsToPM(EP_Peephole, MPM); 335 336 // Optimize memory intrinsic calls based on the profiled size information. 337 if (SizeLevel == 0) 338 MPM.add(createPGOMemOPSizeOptLegacyPass()); 339 340 MPM.add(createTailCallEliminationPass()); // Eliminate tail calls 341 MPM.add(createCFGSimplificationPass()); // Merge & remove BBs 342 MPM.add(createReassociatePass()); // Reassociate expressions 343 344 // Begin the loop pass pipeline. 345 if (EnableSimpleLoopUnswitch) { 346 // The simple loop unswitch pass relies on separate cleanup passes. Schedule 347 // them first so when we re-process a loop they run before other loop 348 // passes. 349 MPM.add(createLoopInstSimplifyPass()); 350 MPM.add(createLoopSimplifyCFGPass()); 351 } 352 // Rotate Loop - disable header duplication at -Oz 353 MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); 354 MPM.add(createLICMPass()); // Hoist loop invariants 355 if (EnableSimpleLoopUnswitch) 356 MPM.add(createSimpleLoopUnswitchLegacyPass()); 357 else 358 MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); 359 // FIXME: We break the loop pass pipeline here in order to do full 360 // simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace the 361 // need for this. 362 MPM.add(createCFGSimplificationPass()); 363 addInstructionCombiningPass(MPM); 364 // We resume loop passes creating a second loop pipeline here. 365 MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars 366 MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. 367 addExtensionsToPM(EP_LateLoopOptimizations, MPM); 368 MPM.add(createLoopDeletionPass()); // Delete dead loops 369 370 if (EnableLoopInterchange) { 371 // FIXME: These are function passes and break the loop pass pipeline. 372 MPM.add(createLoopInterchangePass()); // Interchange loops 373 MPM.add(createCFGSimplificationPass()); 374 } 375 if (!DisableUnrollLoops) 376 MPM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops 377 addExtensionsToPM(EP_LoopOptimizerEnd, MPM); 378 // This ends the loop pass pipelines. 379 380 if (OptLevel > 1) { 381 MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds 382 MPM.add(NewGVN ? createNewGVNPass() 383 : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies 384 } 385 MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset 386 MPM.add(createSCCPPass()); // Constant prop with SCCP 387 388 // Delete dead bit computations (instcombine runs after to fold away the dead 389 // computations, and then ADCE will run later to exploit any new DCE 390 // opportunities that creates). 391 MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations 392 393 // Run instcombine after redundancy elimination to exploit opportunities 394 // opened up by them. 395 addInstructionCombiningPass(MPM); 396 addExtensionsToPM(EP_Peephole, MPM); 397 MPM.add(createJumpThreadingPass()); // Thread jumps 398 MPM.add(createCorrelatedValuePropagationPass()); 399 MPM.add(createDeadStoreEliminationPass()); // Delete dead stores 400 MPM.add(createLICMPass()); 401 402 addExtensionsToPM(EP_ScalarOptimizerLate, MPM); 403 404 if (RerollLoops) 405 MPM.add(createLoopRerollPass()); 406 if (!RunSLPAfterLoopVectorization && SLPVectorize) 407 MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. 408 409 MPM.add(createAggressiveDCEPass()); // Delete dead instructions 410 MPM.add(createCFGSimplificationPass()); // Merge & remove BBs 411 // Clean up after everything. 412 addInstructionCombiningPass(MPM); 413 addExtensionsToPM(EP_Peephole, MPM); 414 } 415 416 void PassManagerBuilder::populateModulePassManager( 417 legacy::PassManagerBase &MPM) { 418 if (!PGOSampleUse.empty()) { 419 MPM.add(createPruneEHPass()); 420 MPM.add(createSampleProfileLoaderPass(PGOSampleUse)); 421 } 422 423 // Allow forcing function attributes as a debugging and tuning aid. 424 MPM.add(createForceFunctionAttrsLegacyPass()); 425 426 // If all optimizations are disabled, just run the always-inline pass and, 427 // if enabled, the function merging pass. 428 if (OptLevel == 0) { 429 addPGOInstrPasses(MPM); 430 if (Inliner) { 431 MPM.add(Inliner); 432 Inliner = nullptr; 433 } 434 435 // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly 436 // creates a CGSCC pass manager, but we don't want to add extensions into 437 // that pass manager. To prevent this we insert a no-op module pass to reset 438 // the pass manager to get the same behavior as EP_OptimizerLast in non-O0 439 // builds. The function merging pass is 440 if (MergeFunctions) 441 MPM.add(createMergeFunctionsPass()); 442 else if (GlobalExtensionsNotEmpty() || !Extensions.empty()) 443 MPM.add(createBarrierNoopPass()); 444 445 if (PerformThinLTO) { 446 // Drop available_externally and unreferenced globals. This is necessary 447 // with ThinLTO in order to avoid leaving undefined references to dead 448 // globals in the object file. 449 MPM.add(createEliminateAvailableExternallyPass()); 450 MPM.add(createGlobalDCEPass()); 451 } 452 453 addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); 454 455 // Rename anon globals to be able to export them in the summary. 456 // This has to be done after we add the extensions to the pass manager 457 // as there could be passes (e.g. Adddress sanitizer) which introduce 458 // new unnamed globals. 459 if (PrepareForLTO || PrepareForThinLTO) 460 MPM.add(createNameAnonGlobalPass()); 461 return; 462 } 463 464 // Add LibraryInfo if we have some. 465 if (LibraryInfo) 466 MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); 467 468 addInitialAliasAnalysisPasses(MPM); 469 470 // For ThinLTO there are two passes of indirect call promotion. The 471 // first is during the compile phase when PerformThinLTO=false and 472 // intra-module indirect call targets are promoted. The second is during 473 // the ThinLTO backend when PerformThinLTO=true, when we promote imported 474 // inter-module indirect calls. For that we perform indirect call promotion 475 // earlier in the pass pipeline, here before globalopt. Otherwise imported 476 // available_externally functions look unreferenced and are removed. 477 if (PerformThinLTO) 478 MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true, 479 !PGOSampleUse.empty())); 480 481 // For SamplePGO in ThinLTO compile phase, we do not want to unroll loops 482 // as it will change the CFG too much to make the 2nd profile annotation 483 // in backend more difficult. 484 bool PrepareForThinLTOUsingPGOSampleProfile = 485 PrepareForThinLTO && !PGOSampleUse.empty(); 486 if (PrepareForThinLTOUsingPGOSampleProfile) 487 DisableUnrollLoops = true; 488 489 // Infer attributes about declarations if possible. 490 MPM.add(createInferFunctionAttrsLegacyPass()); 491 492 addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); 493 494 if (OptLevel > 2) 495 MPM.add(createCallSiteSplittingPass()); 496 497 MPM.add(createIPSCCPPass()); // IP SCCP 498 MPM.add(createCalledValuePropagationPass()); 499 MPM.add(createGlobalOptimizerPass()); // Optimize out global vars 500 // Promote any localized global vars. 501 MPM.add(createPromoteMemoryToRegisterPass()); 502 503 MPM.add(createDeadArgEliminationPass()); // Dead argument elimination 504 505 addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE 506 addExtensionsToPM(EP_Peephole, MPM); 507 MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE 508 509 // For SamplePGO in ThinLTO compile phase, we do not want to do indirect 510 // call promotion as it will change the CFG too much to make the 2nd 511 // profile annotation in backend more difficult. 512 // PGO instrumentation is added during the compile phase for ThinLTO, do 513 // not run it a second time 514 if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile) 515 addPGOInstrPasses(MPM); 516 517 // We add a module alias analysis pass here. In part due to bugs in the 518 // analysis infrastructure this "works" in that the analysis stays alive 519 // for the entire SCC pass run below. 520 MPM.add(createGlobalsAAWrapperPass()); 521 522 // Start of CallGraph SCC passes. 523 MPM.add(createPruneEHPass()); // Remove dead EH info 524 bool RunInliner = false; 525 if (Inliner) { 526 MPM.add(Inliner); 527 Inliner = nullptr; 528 RunInliner = true; 529 } 530 531 MPM.add(createPostOrderFunctionAttrsLegacyPass()); 532 if (OptLevel > 2) 533 MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args 534 535 addExtensionsToPM(EP_CGSCCOptimizerLate, MPM); 536 addFunctionSimplificationPasses(MPM); 537 538 // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC 539 // pass manager that we are specifically trying to avoid. To prevent this 540 // we must insert a no-op module pass to reset the pass manager. 541 MPM.add(createBarrierNoopPass()); 542 543 if (RunPartialInlining) 544 MPM.add(createPartialInliningPass()); 545 546 if (OptLevel > 1 && !PrepareForLTO && !PrepareForThinLTO) 547 // Remove avail extern fns and globals definitions if we aren't 548 // compiling an object file for later LTO. For LTO we want to preserve 549 // these so they are eligible for inlining at link-time. Note if they 550 // are unreferenced they will be removed by GlobalDCE later, so 551 // this only impacts referenced available externally globals. 552 // Eventually they will be suppressed during codegen, but eliminating 553 // here enables more opportunity for GlobalDCE as it may make 554 // globals referenced by available external functions dead 555 // and saves running remaining passes on the eliminated functions. 556 MPM.add(createEliminateAvailableExternallyPass()); 557 558 MPM.add(createReversePostOrderFunctionAttrsPass()); 559 560 // The inliner performs some kind of dead code elimination as it goes, 561 // but there are cases that are not really caught by it. We might 562 // at some point consider teaching the inliner about them, but it 563 // is OK for now to run GlobalOpt + GlobalDCE in tandem as their 564 // benefits generally outweight the cost, making the whole pipeline 565 // faster. 566 if (RunInliner) { 567 MPM.add(createGlobalOptimizerPass()); 568 MPM.add(createGlobalDCEPass()); 569 } 570 571 // If we are planning to perform ThinLTO later, let's not bloat the code with 572 // unrolling/vectorization/... now. We'll first run the inliner + CGSCC passes 573 // during ThinLTO and perform the rest of the optimizations afterward. 574 if (PrepareForThinLTO) { 575 // Ensure we perform any last passes, but do so before renaming anonymous 576 // globals in case the passes add any. 577 addExtensionsToPM(EP_OptimizerLast, MPM); 578 // Rename anon globals to be able to export them in the summary. 579 MPM.add(createNameAnonGlobalPass()); 580 return; 581 } 582 583 if (PerformThinLTO) 584 // Optimize globals now when performing ThinLTO, this enables more 585 // optimizations later. 586 MPM.add(createGlobalOptimizerPass()); 587 588 // Scheduling LoopVersioningLICM when inlining is over, because after that 589 // we may see more accurate aliasing. Reason to run this late is that too 590 // early versioning may prevent further inlining due to increase of code 591 // size. By placing it just after inlining other optimizations which runs 592 // later might get benefit of no-alias assumption in clone loop. 593 if (UseLoopVersioningLICM) { 594 MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM 595 MPM.add(createLICMPass()); // Hoist loop invariants 596 } 597 598 // We add a fresh GlobalsModRef run at this point. This is particularly 599 // useful as the above will have inlined, DCE'ed, and function-attr 600 // propagated everything. We should at this point have a reasonably minimal 601 // and richly annotated call graph. By computing aliasing and mod/ref 602 // information for all local globals here, the late loop passes and notably 603 // the vectorizer will be able to use them to help recognize vectorizable 604 // memory operations. 605 // 606 // Note that this relies on a bug in the pass manager which preserves 607 // a module analysis into a function pass pipeline (and throughout it) so 608 // long as the first function pass doesn't invalidate the module analysis. 609 // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for 610 // this to work. Fortunately, it is trivial to preserve AliasAnalysis 611 // (doing nothing preserves it as it is required to be conservatively 612 // correct in the face of IR changes). 613 MPM.add(createGlobalsAAWrapperPass()); 614 615 MPM.add(createFloat2IntPass()); 616 617 addExtensionsToPM(EP_VectorizerStart, MPM); 618 619 // Re-rotate loops in all our loop nests. These may have fallout out of 620 // rotated form due to GVN or other transformations, and the vectorizer relies 621 // on the rotated form. Disable header duplication at -Oz. 622 MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); 623 624 // Distribute loops to allow partial vectorization. I.e. isolate dependences 625 // into separate loop that would otherwise inhibit vectorization. This is 626 // currently only performed for loops marked with the metadata 627 // llvm.loop.distribute=true or when -enable-loop-distribute is specified. 628 MPM.add(createLoopDistributePass()); 629 630 MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize)); 631 632 // Eliminate loads by forwarding stores from the previous iteration to loads 633 // of the current iteration. 634 MPM.add(createLoopLoadEliminationPass()); 635 636 // FIXME: Because of #pragma vectorize enable, the passes below are always 637 // inserted in the pipeline, even when the vectorizer doesn't run (ex. when 638 // on -O1 and no #pragma is found). Would be good to have these two passes 639 // as function calls, so that we can only pass them when the vectorizer 640 // changed the code. 641 addInstructionCombiningPass(MPM); 642 if (OptLevel > 1 && ExtraVectorizerPasses) { 643 // At higher optimization levels, try to clean up any runtime overlap and 644 // alignment checks inserted by the vectorizer. We want to track correllated 645 // runtime checks for two inner loops in the same outer loop, fold any 646 // common computations, hoist loop-invariant aspects out of any outer loop, 647 // and unswitch the runtime checks if possible. Once hoisted, we may have 648 // dead (or speculatable) control flows or more combining opportunities. 649 MPM.add(createEarlyCSEPass()); 650 MPM.add(createCorrelatedValuePropagationPass()); 651 addInstructionCombiningPass(MPM); 652 MPM.add(createLICMPass()); 653 MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); 654 MPM.add(createCFGSimplificationPass()); 655 addInstructionCombiningPass(MPM); 656 } 657 658 // Cleanup after loop vectorization, etc. Simplification passes like CVP and 659 // GVN, loop transforms, and others have already run, so it's now better to 660 // convert to more optimized IR using more aggressive simplify CFG options. 661 // The extra sinking transform can create larger basic blocks, so do this 662 // before SLP vectorization. 663 MPM.add(createCFGSimplificationPass(1, true, true, false, true)); 664 665 if (RunSLPAfterLoopVectorization && SLPVectorize) { 666 MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. 667 if (OptLevel > 1 && ExtraVectorizerPasses) { 668 MPM.add(createEarlyCSEPass()); 669 } 670 } 671 672 addExtensionsToPM(EP_Peephole, MPM); 673 addInstructionCombiningPass(MPM); 674 675 if (!DisableUnrollLoops) { 676 if (EnableUnrollAndJam) { 677 // Unroll and Jam. We do this before unroll but need to be in a separate 678 // loop pass manager in order for the outer loop to be processed by 679 // unroll and jam before the inner loop is unrolled. 680 MPM.add(createLoopUnrollAndJamPass(OptLevel)); 681 } 682 683 MPM.add(createLoopUnrollPass(OptLevel)); // Unroll small loops 684 685 // LoopUnroll may generate some redundency to cleanup. 686 addInstructionCombiningPass(MPM); 687 688 // Runtime unrolling will introduce runtime check in loop prologue. If the 689 // unrolled loop is a inner loop, then the prologue will be inside the 690 // outer loop. LICM pass can help to promote the runtime check out if the 691 // checked value is loop invariant. 692 MPM.add(createLICMPass()); 693 } 694 695 // After vectorization and unrolling, assume intrinsics may tell us more 696 // about pointer alignments. 697 MPM.add(createAlignmentFromAssumptionsPass()); 698 699 // FIXME: We shouldn't bother with this anymore. 700 MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes 701 702 // GlobalOpt already deletes dead functions and globals, at -O2 try a 703 // late pass of GlobalDCE. It is capable of deleting dead cycles. 704 if (OptLevel > 1) { 705 MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. 706 MPM.add(createConstantMergePass()); // Merge dup global constants 707 } 708 709 if (MergeFunctions) 710 MPM.add(createMergeFunctionsPass()); 711 712 // LoopSink pass sinks instructions hoisted by LICM, which serves as a 713 // canonicalization pass that enables other optimizations. As a result, 714 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM 715 // result too early. 716 MPM.add(createLoopSinkPass()); 717 // Get rid of LCSSA nodes. 718 MPM.add(createInstSimplifyLegacyPass()); 719 720 // This hoists/decomposes div/rem ops. It should run after other sink/hoist 721 // passes to avoid re-sinking, but before SimplifyCFG because it can allow 722 // flattening of blocks. 723 MPM.add(createDivRemPairsPass()); 724 725 // LoopSink (and other loop passes since the last simplifyCFG) might have 726 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. 727 MPM.add(createCFGSimplificationPass()); 728 729 addExtensionsToPM(EP_OptimizerLast, MPM); 730 731 // Rename anon globals to be able to handle them in the summary 732 if (PrepareForLTO) 733 MPM.add(createNameAnonGlobalPass()); 734 } 735 736 void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { 737 // Remove unused virtual tables to improve the quality of code generated by 738 // whole-program devirtualization and bitset lowering. 739 PM.add(createGlobalDCEPass()); 740 741 // Provide AliasAnalysis services for optimizations. 742 addInitialAliasAnalysisPasses(PM); 743 744 // Allow forcing function attributes as a debugging and tuning aid. 745 PM.add(createForceFunctionAttrsLegacyPass()); 746 747 // Infer attributes about declarations if possible. 748 PM.add(createInferFunctionAttrsLegacyPass()); 749 750 if (OptLevel > 1) { 751 // Split call-site with more constrained arguments. 752 PM.add(createCallSiteSplittingPass()); 753 754 // Indirect call promotion. This should promote all the targets that are 755 // left by the earlier promotion pass that promotes intra-module targets. 756 // This two-step promotion is to save the compile time. For LTO, it should 757 // produce the same result as if we only do promotion here. 758 PM.add( 759 createPGOIndirectCallPromotionLegacyPass(true, !PGOSampleUse.empty())); 760 761 // Propagate constants at call sites into the functions they call. This 762 // opens opportunities for globalopt (and inlining) by substituting function 763 // pointers passed as arguments to direct uses of functions. 764 PM.add(createIPSCCPPass()); 765 766 // Attach metadata to indirect call sites indicating the set of functions 767 // they may target at run-time. This should follow IPSCCP. 768 PM.add(createCalledValuePropagationPass()); 769 } 770 771 // Infer attributes about definitions. The readnone attribute in particular is 772 // required for virtual constant propagation. 773 PM.add(createPostOrderFunctionAttrsLegacyPass()); 774 PM.add(createReversePostOrderFunctionAttrsPass()); 775 776 // Split globals using inrange annotations on GEP indices. This can help 777 // improve the quality of generated code when virtual constant propagation or 778 // control flow integrity are enabled. 779 PM.add(createGlobalSplitPass()); 780 781 // Apply whole-program devirtualization and virtual constant propagation. 782 PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr)); 783 784 // That's all we need at opt level 1. 785 if (OptLevel == 1) 786 return; 787 788 // Now that we internalized some globals, see if we can hack on them! 789 PM.add(createGlobalOptimizerPass()); 790 // Promote any localized global vars. 791 PM.add(createPromoteMemoryToRegisterPass()); 792 793 // Linking modules together can lead to duplicated global constants, only 794 // keep one copy of each constant. 795 PM.add(createConstantMergePass()); 796 797 // Remove unused arguments from functions. 798 PM.add(createDeadArgEliminationPass()); 799 800 // Reduce the code after globalopt and ipsccp. Both can open up significant 801 // simplification opportunities, and both can propagate functions through 802 // function pointers. When this happens, we often have to resolve varargs 803 // calls, etc, so let instcombine do this. 804 if (OptLevel > 2) 805 PM.add(createAggressiveInstCombinerPass()); 806 addInstructionCombiningPass(PM); 807 addExtensionsToPM(EP_Peephole, PM); 808 809 // Inline small functions 810 bool RunInliner = Inliner; 811 if (RunInliner) { 812 PM.add(Inliner); 813 Inliner = nullptr; 814 } 815 816 PM.add(createPruneEHPass()); // Remove dead EH info. 817 818 // Optimize globals again if we ran the inliner. 819 if (RunInliner) 820 PM.add(createGlobalOptimizerPass()); 821 PM.add(createGlobalDCEPass()); // Remove dead functions. 822 823 // If we didn't decide to inline a function, check to see if we can 824 // transform it to pass arguments by value instead of by reference. 825 PM.add(createArgumentPromotionPass()); 826 827 // The IPO passes may leave cruft around. Clean up after them. 828 addInstructionCombiningPass(PM); 829 addExtensionsToPM(EP_Peephole, PM); 830 PM.add(createJumpThreadingPass()); 831 832 // Break up allocas 833 PM.add(createSROAPass()); 834 835 // Run a few AA driven optimizations here and now, to cleanup the code. 836 PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture. 837 PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. 838 839 PM.add(createLICMPass()); // Hoist loop invariants. 840 PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds. 841 PM.add(NewGVN ? createNewGVNPass() 842 : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. 843 PM.add(createMemCpyOptPass()); // Remove dead memcpys. 844 845 // Nuke dead stores. 846 PM.add(createDeadStoreEliminationPass()); 847 848 // More loops are countable; try to optimize them. 849 PM.add(createIndVarSimplifyPass()); 850 PM.add(createLoopDeletionPass()); 851 if (EnableLoopInterchange) 852 PM.add(createLoopInterchangePass()); 853 854 if (!DisableUnrollLoops) 855 PM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops 856 PM.add(createLoopVectorizePass(true, LoopVectorize)); 857 // The vectorizer may have significantly shortened a loop body; unroll again. 858 if (!DisableUnrollLoops) 859 PM.add(createLoopUnrollPass(OptLevel)); 860 861 // Now that we've optimized loops (in particular loop induction variables), 862 // we may have exposed more scalar opportunities. Run parts of the scalar 863 // optimizer again at this point. 864 addInstructionCombiningPass(PM); // Initial cleanup 865 PM.add(createCFGSimplificationPass()); // if-convert 866 PM.add(createSCCPPass()); // Propagate exposed constants 867 addInstructionCombiningPass(PM); // Clean up again 868 PM.add(createBitTrackingDCEPass()); 869 870 // More scalar chains could be vectorized due to more alias information 871 if (RunSLPAfterLoopVectorization) 872 if (SLPVectorize) 873 PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. 874 875 // After vectorization, assume intrinsics may tell us more about pointer 876 // alignments. 877 PM.add(createAlignmentFromAssumptionsPass()); 878 879 // Cleanup and simplify the code after the scalar optimizations. 880 addInstructionCombiningPass(PM); 881 addExtensionsToPM(EP_Peephole, PM); 882 883 PM.add(createJumpThreadingPass()); 884 } 885 886 void PassManagerBuilder::addLateLTOOptimizationPasses( 887 legacy::PassManagerBase &PM) { 888 // Delete basic blocks, which optimization passes may have killed. 889 PM.add(createCFGSimplificationPass()); 890 891 // Drop bodies of available externally objects to improve GlobalDCE. 892 PM.add(createEliminateAvailableExternallyPass()); 893 894 // Now that we have optimized the program, discard unreachable functions. 895 PM.add(createGlobalDCEPass()); 896 897 // FIXME: this is profitable (for compiler time) to do at -O0 too, but 898 // currently it damages debug info. 899 if (MergeFunctions) 900 PM.add(createMergeFunctionsPass()); 901 } 902 903 void PassManagerBuilder::populateThinLTOPassManager( 904 legacy::PassManagerBase &PM) { 905 PerformThinLTO = true; 906 if (LibraryInfo) 907 PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); 908 909 if (VerifyInput) 910 PM.add(createVerifierPass()); 911 912 if (ImportSummary) { 913 // These passes import type identifier resolutions for whole-program 914 // devirtualization and CFI. They must run early because other passes may 915 // disturb the specific instruction patterns that these passes look for, 916 // creating dependencies on resolutions that may not appear in the summary. 917 // 918 // For example, GVN may transform the pattern assume(type.test) appearing in 919 // two basic blocks into assume(phi(type.test, type.test)), which would 920 // transform a dependency on a WPD resolution into a dependency on a type 921 // identifier resolution for CFI. 922 // 923 // Also, WPD has access to more precise information than ICP and can 924 // devirtualize more effectively, so it should operate on the IR first. 925 PM.add(createWholeProgramDevirtPass(nullptr, ImportSummary)); 926 PM.add(createLowerTypeTestsPass(nullptr, ImportSummary)); 927 } 928 929 populateModulePassManager(PM); 930 931 if (VerifyOutput) 932 PM.add(createVerifierPass()); 933 PerformThinLTO = false; 934 } 935 936 void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { 937 if (LibraryInfo) 938 PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); 939 940 if (VerifyInput) 941 PM.add(createVerifierPass()); 942 943 if (OptLevel != 0) 944 addLTOOptimizationPasses(PM); 945 else { 946 // The whole-program-devirt pass needs to run at -O0 because only it knows 947 // about the llvm.type.checked.load intrinsic: it needs to both lower the 948 // intrinsic itself and handle it in the summary. 949 PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr)); 950 } 951 952 // Create a function that performs CFI checks for cross-DSO calls with targets 953 // in the current module. 954 PM.add(createCrossDSOCFIPass()); 955 956 // Lower type metadata and the type.test intrinsic. This pass supports Clang's 957 // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at 958 // link time if CFI is enabled. The pass does nothing if CFI is disabled. 959 PM.add(createLowerTypeTestsPass(ExportSummary, nullptr)); 960 961 if (OptLevel != 0) 962 addLateLTOOptimizationPasses(PM); 963 964 if (VerifyOutput) 965 PM.add(createVerifierPass()); 966 } 967 968 inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) { 969 return reinterpret_cast<PassManagerBuilder*>(P); 970 } 971 972 inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) { 973 return reinterpret_cast<LLVMPassManagerBuilderRef>(P); 974 } 975 976 LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() { 977 PassManagerBuilder *PMB = new PassManagerBuilder(); 978 return wrap(PMB); 979 } 980 981 void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) { 982 PassManagerBuilder *Builder = unwrap(PMB); 983 delete Builder; 984 } 985 986 void 987 LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB, 988 unsigned OptLevel) { 989 PassManagerBuilder *Builder = unwrap(PMB); 990 Builder->OptLevel = OptLevel; 991 } 992 993 void 994 LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB, 995 unsigned SizeLevel) { 996 PassManagerBuilder *Builder = unwrap(PMB); 997 Builder->SizeLevel = SizeLevel; 998 } 999 1000 void 1001 LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB, 1002 LLVMBool Value) { 1003 // NOTE: The DisableUnitAtATime switch has been removed. 1004 } 1005 1006 void 1007 LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB, 1008 LLVMBool Value) { 1009 PassManagerBuilder *Builder = unwrap(PMB); 1010 Builder->DisableUnrollLoops = Value; 1011 } 1012 1013 void 1014 LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB, 1015 LLVMBool Value) { 1016 // NOTE: The simplify-libcalls pass has been removed. 1017 } 1018 1019 void 1020 LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB, 1021 unsigned Threshold) { 1022 PassManagerBuilder *Builder = unwrap(PMB); 1023 Builder->Inliner = createFunctionInliningPass(Threshold); 1024 } 1025 1026 void 1027 LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB, 1028 LLVMPassManagerRef PM) { 1029 PassManagerBuilder *Builder = unwrap(PMB); 1030 legacy::FunctionPassManager *FPM = unwrap<legacy::FunctionPassManager>(PM); 1031 Builder->populateFunctionPassManager(*FPM); 1032 } 1033 1034 void 1035 LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB, 1036 LLVMPassManagerRef PM) { 1037 PassManagerBuilder *Builder = unwrap(PMB); 1038 legacy::PassManagerBase *MPM = unwrap(PM); 1039 Builder->populateModulePassManager(*MPM); 1040 } 1041 1042 void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, 1043 LLVMPassManagerRef PM, 1044 LLVMBool Internalize, 1045 LLVMBool RunInliner) { 1046 PassManagerBuilder *Builder = unwrap(PMB); 1047 legacy::PassManagerBase *LPM = unwrap(PM); 1048 1049 // A small backwards compatibility hack. populateLTOPassManager used to take 1050 // an RunInliner option. 1051 if (RunInliner && !Builder->Inliner) 1052 Builder->Inliner = createFunctionInliningPass(); 1053 1054 Builder->populateLTOPassManager(*LPM); 1055 } 1056