1 //===- ObjCARCOpts.cpp - ObjC ARC Optimization ----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This file defines ObjC ARC optimizations. ARC stands for Automatic 11 /// Reference Counting and is a system for managing reference counts for objects 12 /// in Objective C. 13 /// 14 /// The optimizations performed include elimination of redundant, partially 15 /// redundant, and inconsequential reference count operations, elimination of 16 /// redundant weak pointer operations, and numerous minor simplifications. 17 /// 18 /// WARNING: This file knows about certain library functions. It recognizes them 19 /// by name, and hardwires knowledge of their semantics. 20 /// 21 /// WARNING: This file knows about how certain Objective-C library functions are 22 /// used. Naive LLVM IR transformations which would otherwise be 23 /// behavior-preserving may break these assumptions. 24 /// 25 //===----------------------------------------------------------------------===// 26 27 #include "ObjCARC.h" 28 #include "ARCRuntimeEntryPoints.h" 29 #include "BlotMapVector.h" 30 #include "DependencyAnalysis.h" 31 #include "ProvenanceAnalysis.h" 32 #include "PtrState.h" 33 #include "llvm/ADT/DenseMap.h" 34 #include "llvm/ADT/DenseSet.h" 35 #include "llvm/ADT/STLExtras.h" 36 #include "llvm/ADT/SmallPtrSet.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/Analysis/ObjCARCAliasAnalysis.h" 39 #include "llvm/IR/CFG.h" 40 #include "llvm/IR/IRBuilder.h" 41 #include "llvm/IR/LLVMContext.h" 42 #include "llvm/Support/Debug.h" 43 #include "llvm/Support/raw_ostream.h" 44 45 using namespace llvm; 46 using namespace llvm::objcarc; 47 48 #define DEBUG_TYPE "objc-arc-opts" 49 50 /// \defgroup ARCUtilities Utility declarations/definitions specific to ARC. 51 /// @{ 52 53 /// \brief This is similar to GetRCIdentityRoot but it stops as soon 54 /// as it finds a value with multiple uses. 55 static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { 56 if (Arg->hasOneUse()) { 57 if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg)) 58 return FindSingleUseIdentifiedObject(BC->getOperand(0)); 59 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg)) 60 if (GEP->hasAllZeroIndices()) 61 return FindSingleUseIdentifiedObject(GEP->getPointerOperand()); 62 if (IsForwarding(GetBasicARCInstKind(Arg))) 63 return FindSingleUseIdentifiedObject( 64 cast<CallInst>(Arg)->getArgOperand(0)); 65 if (!IsObjCIdentifiedObject(Arg)) 66 return nullptr; 67 return Arg; 68 } 69 70 // If we found an identifiable object but it has multiple uses, but they are 71 // trivial uses, we can still consider this to be a single-use value. 72 if (IsObjCIdentifiedObject(Arg)) { 73 for (const User *U : Arg->users()) 74 if (!U->use_empty() || GetRCIdentityRoot(U) != Arg) 75 return nullptr; 76 77 return Arg; 78 } 79 80 return nullptr; 81 } 82 83 /// This is a wrapper around getUnderlyingObjCPtr along the lines of 84 /// GetUnderlyingObjects except that it returns early when it sees the first 85 /// alloca. 86 static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V, 87 const DataLayout &DL) { 88 SmallPtrSet<const Value *, 4> Visited; 89 SmallVector<const Value *, 4> Worklist; 90 Worklist.push_back(V); 91 do { 92 const Value *P = Worklist.pop_back_val(); 93 P = GetUnderlyingObjCPtr(P, DL); 94 95 if (isa<AllocaInst>(P)) 96 return true; 97 98 if (!Visited.insert(P).second) 99 continue; 100 101 if (const SelectInst *SI = dyn_cast<const SelectInst>(P)) { 102 Worklist.push_back(SI->getTrueValue()); 103 Worklist.push_back(SI->getFalseValue()); 104 continue; 105 } 106 107 if (const PHINode *PN = dyn_cast<const PHINode>(P)) { 108 for (Value *IncValue : PN->incoming_values()) 109 Worklist.push_back(IncValue); 110 continue; 111 } 112 } while (!Worklist.empty()); 113 114 return false; 115 } 116 117 118 /// @} 119 /// 120 /// \defgroup ARCOpt ARC Optimization. 121 /// @{ 122 123 // TODO: On code like this: 124 // 125 // objc_retain(%x) 126 // stuff_that_cannot_release() 127 // objc_autorelease(%x) 128 // stuff_that_cannot_release() 129 // objc_retain(%x) 130 // stuff_that_cannot_release() 131 // objc_autorelease(%x) 132 // 133 // The second retain and autorelease can be deleted. 134 135 // TODO: It should be possible to delete 136 // objc_autoreleasePoolPush and objc_autoreleasePoolPop 137 // pairs if nothing is actually autoreleased between them. Also, autorelease 138 // calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code 139 // after inlining) can be turned into plain release calls. 140 141 // TODO: Critical-edge splitting. If the optimial insertion point is 142 // a critical edge, the current algorithm has to fail, because it doesn't 143 // know how to split edges. It should be possible to make the optimizer 144 // think in terms of edges, rather than blocks, and then split critical 145 // edges on demand. 146 147 // TODO: OptimizeSequences could generalized to be Interprocedural. 148 149 // TODO: Recognize that a bunch of other objc runtime calls have 150 // non-escaping arguments and non-releasing arguments, and may be 151 // non-autoreleasing. 152 153 // TODO: Sink autorelease calls as far as possible. Unfortunately we 154 // usually can't sink them past other calls, which would be the main 155 // case where it would be useful. 156 157 // TODO: The pointer returned from objc_loadWeakRetained is retained. 158 159 // TODO: Delete release+retain pairs (rare). 160 161 STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); 162 STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); 163 STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases"); 164 STATISTIC(NumRets, "Number of return value forwarding " 165 "retain+autoreleases eliminated"); 166 STATISTIC(NumRRs, "Number of retain+release paths eliminated"); 167 STATISTIC(NumPeeps, "Number of calls peephole-optimized"); 168 #ifndef NDEBUG 169 STATISTIC(NumRetainsBeforeOpt, 170 "Number of retains before optimization"); 171 STATISTIC(NumReleasesBeforeOpt, 172 "Number of releases before optimization"); 173 STATISTIC(NumRetainsAfterOpt, 174 "Number of retains after optimization"); 175 STATISTIC(NumReleasesAfterOpt, 176 "Number of releases after optimization"); 177 #endif 178 179 namespace { 180 /// \brief Per-BasicBlock state. 181 class BBState { 182 /// The number of unique control paths from the entry which can reach this 183 /// block. 184 unsigned TopDownPathCount; 185 186 /// The number of unique control paths to exits from this block. 187 unsigned BottomUpPathCount; 188 189 /// The top-down traversal uses this to record information known about a 190 /// pointer at the bottom of each block. 191 BlotMapVector<const Value *, TopDownPtrState> PerPtrTopDown; 192 193 /// The bottom-up traversal uses this to record information known about a 194 /// pointer at the top of each block. 195 BlotMapVector<const Value *, BottomUpPtrState> PerPtrBottomUp; 196 197 /// Effective predecessors of the current block ignoring ignorable edges and 198 /// ignored backedges. 199 SmallVector<BasicBlock *, 2> Preds; 200 201 /// Effective successors of the current block ignoring ignorable edges and 202 /// ignored backedges. 203 SmallVector<BasicBlock *, 2> Succs; 204 205 public: 206 static const unsigned OverflowOccurredValue; 207 208 BBState() : TopDownPathCount(0), BottomUpPathCount(0) { } 209 210 typedef decltype(PerPtrTopDown)::iterator top_down_ptr_iterator; 211 typedef decltype(PerPtrTopDown)::const_iterator const_top_down_ptr_iterator; 212 213 top_down_ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); } 214 top_down_ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); } 215 const_top_down_ptr_iterator top_down_ptr_begin() const { 216 return PerPtrTopDown.begin(); 217 } 218 const_top_down_ptr_iterator top_down_ptr_end() const { 219 return PerPtrTopDown.end(); 220 } 221 bool hasTopDownPtrs() const { 222 return !PerPtrTopDown.empty(); 223 } 224 225 typedef decltype(PerPtrBottomUp)::iterator bottom_up_ptr_iterator; 226 typedef decltype( 227 PerPtrBottomUp)::const_iterator const_bottom_up_ptr_iterator; 228 229 bottom_up_ptr_iterator bottom_up_ptr_begin() { 230 return PerPtrBottomUp.begin(); 231 } 232 bottom_up_ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); } 233 const_bottom_up_ptr_iterator bottom_up_ptr_begin() const { 234 return PerPtrBottomUp.begin(); 235 } 236 const_bottom_up_ptr_iterator bottom_up_ptr_end() const { 237 return PerPtrBottomUp.end(); 238 } 239 bool hasBottomUpPtrs() const { 240 return !PerPtrBottomUp.empty(); 241 } 242 243 /// Mark this block as being an entry block, which has one path from the 244 /// entry by definition. 245 void SetAsEntry() { TopDownPathCount = 1; } 246 247 /// Mark this block as being an exit block, which has one path to an exit by 248 /// definition. 249 void SetAsExit() { BottomUpPathCount = 1; } 250 251 /// Attempt to find the PtrState object describing the top down state for 252 /// pointer Arg. Return a new initialized PtrState describing the top down 253 /// state for Arg if we do not find one. 254 TopDownPtrState &getPtrTopDownState(const Value *Arg) { 255 return PerPtrTopDown[Arg]; 256 } 257 258 /// Attempt to find the PtrState object describing the bottom up state for 259 /// pointer Arg. Return a new initialized PtrState describing the bottom up 260 /// state for Arg if we do not find one. 261 BottomUpPtrState &getPtrBottomUpState(const Value *Arg) { 262 return PerPtrBottomUp[Arg]; 263 } 264 265 /// Attempt to find the PtrState object describing the bottom up state for 266 /// pointer Arg. 267 bottom_up_ptr_iterator findPtrBottomUpState(const Value *Arg) { 268 return PerPtrBottomUp.find(Arg); 269 } 270 271 void clearBottomUpPointers() { 272 PerPtrBottomUp.clear(); 273 } 274 275 void clearTopDownPointers() { 276 PerPtrTopDown.clear(); 277 } 278 279 void InitFromPred(const BBState &Other); 280 void InitFromSucc(const BBState &Other); 281 void MergePred(const BBState &Other); 282 void MergeSucc(const BBState &Other); 283 284 /// Compute the number of possible unique paths from an entry to an exit 285 /// which pass through this block. This is only valid after both the 286 /// top-down and bottom-up traversals are complete. 287 /// 288 /// Returns true if overflow occurred. Returns false if overflow did not 289 /// occur. 290 bool GetAllPathCountWithOverflow(unsigned &PathCount) const { 291 if (TopDownPathCount == OverflowOccurredValue || 292 BottomUpPathCount == OverflowOccurredValue) 293 return true; 294 unsigned long long Product = 295 (unsigned long long)TopDownPathCount*BottomUpPathCount; 296 // Overflow occurred if any of the upper bits of Product are set or if all 297 // the lower bits of Product are all set. 298 return (Product >> 32) || 299 ((PathCount = Product) == OverflowOccurredValue); 300 } 301 302 // Specialized CFG utilities. 303 typedef SmallVectorImpl<BasicBlock *>::const_iterator edge_iterator; 304 edge_iterator pred_begin() const { return Preds.begin(); } 305 edge_iterator pred_end() const { return Preds.end(); } 306 edge_iterator succ_begin() const { return Succs.begin(); } 307 edge_iterator succ_end() const { return Succs.end(); } 308 309 void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); } 310 void addPred(BasicBlock *Pred) { Preds.push_back(Pred); } 311 312 bool isExit() const { return Succs.empty(); } 313 }; 314 315 const unsigned BBState::OverflowOccurredValue = 0xffffffff; 316 } 317 318 namespace llvm { 319 raw_ostream &operator<<(raw_ostream &OS, 320 BBState &BBState) LLVM_ATTRIBUTE_UNUSED; 321 } 322 323 void BBState::InitFromPred(const BBState &Other) { 324 PerPtrTopDown = Other.PerPtrTopDown; 325 TopDownPathCount = Other.TopDownPathCount; 326 } 327 328 void BBState::InitFromSucc(const BBState &Other) { 329 PerPtrBottomUp = Other.PerPtrBottomUp; 330 BottomUpPathCount = Other.BottomUpPathCount; 331 } 332 333 /// The top-down traversal uses this to merge information about predecessors to 334 /// form the initial state for a new block. 335 void BBState::MergePred(const BBState &Other) { 336 if (TopDownPathCount == OverflowOccurredValue) 337 return; 338 339 // Other.TopDownPathCount can be 0, in which case it is either dead or a 340 // loop backedge. Loop backedges are special. 341 TopDownPathCount += Other.TopDownPathCount; 342 343 // In order to be consistent, we clear the top down pointers when by adding 344 // TopDownPathCount becomes OverflowOccurredValue even though "true" overflow 345 // has not occurred. 346 if (TopDownPathCount == OverflowOccurredValue) { 347 clearTopDownPointers(); 348 return; 349 } 350 351 // Check for overflow. If we have overflow, fall back to conservative 352 // behavior. 353 if (TopDownPathCount < Other.TopDownPathCount) { 354 TopDownPathCount = OverflowOccurredValue; 355 clearTopDownPointers(); 356 return; 357 } 358 359 // For each entry in the other set, if our set has an entry with the same key, 360 // merge the entries. Otherwise, copy the entry and merge it with an empty 361 // entry. 362 for (auto MI = Other.top_down_ptr_begin(), ME = Other.top_down_ptr_end(); 363 MI != ME; ++MI) { 364 auto Pair = PerPtrTopDown.insert(*MI); 365 Pair.first->second.Merge(Pair.second ? TopDownPtrState() : MI->second, 366 /*TopDown=*/true); 367 } 368 369 // For each entry in our set, if the other set doesn't have an entry with the 370 // same key, force it to merge with an empty entry. 371 for (auto MI = top_down_ptr_begin(), ME = top_down_ptr_end(); MI != ME; ++MI) 372 if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end()) 373 MI->second.Merge(TopDownPtrState(), /*TopDown=*/true); 374 } 375 376 /// The bottom-up traversal uses this to merge information about successors to 377 /// form the initial state for a new block. 378 void BBState::MergeSucc(const BBState &Other) { 379 if (BottomUpPathCount == OverflowOccurredValue) 380 return; 381 382 // Other.BottomUpPathCount can be 0, in which case it is either dead or a 383 // loop backedge. Loop backedges are special. 384 BottomUpPathCount += Other.BottomUpPathCount; 385 386 // In order to be consistent, we clear the top down pointers when by adding 387 // BottomUpPathCount becomes OverflowOccurredValue even though "true" overflow 388 // has not occurred. 389 if (BottomUpPathCount == OverflowOccurredValue) { 390 clearBottomUpPointers(); 391 return; 392 } 393 394 // Check for overflow. If we have overflow, fall back to conservative 395 // behavior. 396 if (BottomUpPathCount < Other.BottomUpPathCount) { 397 BottomUpPathCount = OverflowOccurredValue; 398 clearBottomUpPointers(); 399 return; 400 } 401 402 // For each entry in the other set, if our set has an entry with the 403 // same key, merge the entries. Otherwise, copy the entry and merge 404 // it with an empty entry. 405 for (auto MI = Other.bottom_up_ptr_begin(), ME = Other.bottom_up_ptr_end(); 406 MI != ME; ++MI) { 407 auto Pair = PerPtrBottomUp.insert(*MI); 408 Pair.first->second.Merge(Pair.second ? BottomUpPtrState() : MI->second, 409 /*TopDown=*/false); 410 } 411 412 // For each entry in our set, if the other set doesn't have an entry 413 // with the same key, force it to merge with an empty entry. 414 for (auto MI = bottom_up_ptr_begin(), ME = bottom_up_ptr_end(); MI != ME; 415 ++MI) 416 if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end()) 417 MI->second.Merge(BottomUpPtrState(), /*TopDown=*/false); 418 } 419 420 raw_ostream &llvm::operator<<(raw_ostream &OS, BBState &BBInfo) { 421 // Dump the pointers we are tracking. 422 OS << " TopDown State:\n"; 423 if (!BBInfo.hasTopDownPtrs()) { 424 DEBUG(llvm::dbgs() << " NONE!\n"); 425 } else { 426 for (auto I = BBInfo.top_down_ptr_begin(), E = BBInfo.top_down_ptr_end(); 427 I != E; ++I) { 428 const PtrState &P = I->second; 429 OS << " Ptr: " << *I->first 430 << "\n KnownSafe: " << (P.IsKnownSafe()?"true":"false") 431 << "\n ImpreciseRelease: " 432 << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n" 433 << " HasCFGHazards: " 434 << (P.IsCFGHazardAfflicted()?"true":"false") << "\n" 435 << " KnownPositive: " 436 << (P.HasKnownPositiveRefCount()?"true":"false") << "\n" 437 << " Seq: " 438 << P.GetSeq() << "\n"; 439 } 440 } 441 442 OS << " BottomUp State:\n"; 443 if (!BBInfo.hasBottomUpPtrs()) { 444 DEBUG(llvm::dbgs() << " NONE!\n"); 445 } else { 446 for (auto I = BBInfo.bottom_up_ptr_begin(), E = BBInfo.bottom_up_ptr_end(); 447 I != E; ++I) { 448 const PtrState &P = I->second; 449 OS << " Ptr: " << *I->first 450 << "\n KnownSafe: " << (P.IsKnownSafe()?"true":"false") 451 << "\n ImpreciseRelease: " 452 << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n" 453 << " HasCFGHazards: " 454 << (P.IsCFGHazardAfflicted()?"true":"false") << "\n" 455 << " KnownPositive: " 456 << (P.HasKnownPositiveRefCount()?"true":"false") << "\n" 457 << " Seq: " 458 << P.GetSeq() << "\n"; 459 } 460 } 461 462 return OS; 463 } 464 465 namespace { 466 467 /// \brief The main ARC optimization pass. 468 class ObjCARCOpt : public FunctionPass { 469 bool Changed; 470 ProvenanceAnalysis PA; 471 472 /// A cache of references to runtime entry point constants. 473 ARCRuntimeEntryPoints EP; 474 475 /// A cache of MDKinds that can be passed into other functions to propagate 476 /// MDKind identifiers. 477 ARCMDKindCache MDKindCache; 478 479 // This is used to track if a pointer is stored into an alloca. 480 DenseSet<const Value *> MultiOwnersSet; 481 482 /// A flag indicating whether this optimization pass should run. 483 bool Run; 484 485 /// Flags which determine whether each of the interesting runtime functions 486 /// is in fact used in the current function. 487 unsigned UsedInThisFunction; 488 489 bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV); 490 void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, 491 ARCInstKind &Class); 492 void OptimizeIndividualCalls(Function &F); 493 494 void CheckForCFGHazards(const BasicBlock *BB, 495 DenseMap<const BasicBlock *, BBState> &BBStates, 496 BBState &MyStates) const; 497 bool VisitInstructionBottomUp(Instruction *Inst, BasicBlock *BB, 498 BlotMapVector<Value *, RRInfo> &Retains, 499 BBState &MyStates); 500 bool VisitBottomUp(BasicBlock *BB, 501 DenseMap<const BasicBlock *, BBState> &BBStates, 502 BlotMapVector<Value *, RRInfo> &Retains); 503 bool VisitInstructionTopDown(Instruction *Inst, 504 DenseMap<Value *, RRInfo> &Releases, 505 BBState &MyStates); 506 bool VisitTopDown(BasicBlock *BB, 507 DenseMap<const BasicBlock *, BBState> &BBStates, 508 DenseMap<Value *, RRInfo> &Releases); 509 bool Visit(Function &F, DenseMap<const BasicBlock *, BBState> &BBStates, 510 BlotMapVector<Value *, RRInfo> &Retains, 511 DenseMap<Value *, RRInfo> &Releases); 512 513 void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove, 514 BlotMapVector<Value *, RRInfo> &Retains, 515 DenseMap<Value *, RRInfo> &Releases, 516 SmallVectorImpl<Instruction *> &DeadInsts, Module *M); 517 518 bool 519 PairUpRetainsAndReleases(DenseMap<const BasicBlock *, BBState> &BBStates, 520 BlotMapVector<Value *, RRInfo> &Retains, 521 DenseMap<Value *, RRInfo> &Releases, Module *M, 522 SmallVectorImpl<Instruction *> &NewRetains, 523 SmallVectorImpl<Instruction *> &NewReleases, 524 SmallVectorImpl<Instruction *> &DeadInsts, 525 RRInfo &RetainsToMove, RRInfo &ReleasesToMove, 526 Value *Arg, bool KnownSafe, 527 bool &AnyPairsCompletelyEliminated); 528 529 bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates, 530 BlotMapVector<Value *, RRInfo> &Retains, 531 DenseMap<Value *, RRInfo> &Releases, Module *M); 532 533 void OptimizeWeakCalls(Function &F); 534 535 bool OptimizeSequences(Function &F); 536 537 void OptimizeReturns(Function &F); 538 539 #ifndef NDEBUG 540 void GatherStatistics(Function &F, bool AfterOptimization = false); 541 #endif 542 543 void getAnalysisUsage(AnalysisUsage &AU) const override; 544 bool doInitialization(Module &M) override; 545 bool runOnFunction(Function &F) override; 546 void releaseMemory() override; 547 548 public: 549 static char ID; 550 ObjCARCOpt() : FunctionPass(ID) { 551 initializeObjCARCOptPass(*PassRegistry::getPassRegistry()); 552 } 553 }; 554 } 555 556 char ObjCARCOpt::ID = 0; 557 INITIALIZE_PASS_BEGIN(ObjCARCOpt, 558 "objc-arc", "ObjC ARC optimization", false, false) 559 INITIALIZE_PASS_DEPENDENCY(ObjCARCAAWrapperPass) 560 INITIALIZE_PASS_END(ObjCARCOpt, 561 "objc-arc", "ObjC ARC optimization", false, false) 562 563 Pass *llvm::createObjCARCOptPass() { 564 return new ObjCARCOpt(); 565 } 566 567 void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const { 568 AU.addRequired<ObjCARCAAWrapperPass>(); 569 AU.addRequired<AAResultsWrapperPass>(); 570 // ARC optimization doesn't currently split critical edges. 571 AU.setPreservesCFG(); 572 } 573 574 /// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is 575 /// not a return value. Or, if it can be paired with an 576 /// objc_autoreleaseReturnValue, delete the pair and return true. 577 bool 578 ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { 579 // Check for the argument being from an immediately preceding call or invoke. 580 const Value *Arg = GetArgRCIdentityRoot(RetainRV); 581 ImmutableCallSite CS(Arg); 582 if (const Instruction *Call = CS.getInstruction()) { 583 if (Call->getParent() == RetainRV->getParent()) { 584 BasicBlock::const_iterator I(Call); 585 ++I; 586 while (IsNoopInstruction(&*I)) 587 ++I; 588 if (&*I == RetainRV) 589 return false; 590 } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) { 591 BasicBlock *RetainRVParent = RetainRV->getParent(); 592 if (II->getNormalDest() == RetainRVParent) { 593 BasicBlock::const_iterator I = RetainRVParent->begin(); 594 while (IsNoopInstruction(&*I)) 595 ++I; 596 if (&*I == RetainRV) 597 return false; 598 } 599 } 600 } 601 602 // Check for being preceded by an objc_autoreleaseReturnValue on the same 603 // pointer. In this case, we can delete the pair. 604 BasicBlock::iterator I = RetainRV->getIterator(), 605 Begin = RetainRV->getParent()->begin(); 606 if (I != Begin) { 607 do 608 --I; 609 while (I != Begin && IsNoopInstruction(&*I)); 610 if (GetBasicARCInstKind(&*I) == ARCInstKind::AutoreleaseRV && 611 GetArgRCIdentityRoot(&*I) == Arg) { 612 Changed = true; 613 ++NumPeeps; 614 615 DEBUG(dbgs() << "Erasing autoreleaseRV,retainRV pair: " << *I << "\n" 616 << "Erasing " << *RetainRV << "\n"); 617 618 EraseInstruction(&*I); 619 EraseInstruction(RetainRV); 620 return true; 621 } 622 } 623 624 // Turn it to a plain objc_retain. 625 Changed = true; 626 ++NumPeeps; 627 628 DEBUG(dbgs() << "Transforming objc_retainAutoreleasedReturnValue => " 629 "objc_retain since the operand is not a return value.\n" 630 "Old = " << *RetainRV << "\n"); 631 632 Constant *NewDecl = EP.get(ARCRuntimeEntryPointKind::Retain); 633 cast<CallInst>(RetainRV)->setCalledFunction(NewDecl); 634 635 DEBUG(dbgs() << "New = " << *RetainRV << "\n"); 636 637 return false; 638 } 639 640 /// Turn objc_autoreleaseReturnValue into objc_autorelease if the result is not 641 /// used as a return value. 642 void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, 643 Instruction *AutoreleaseRV, 644 ARCInstKind &Class) { 645 // Check for a return of the pointer value. 646 const Value *Ptr = GetArgRCIdentityRoot(AutoreleaseRV); 647 SmallVector<const Value *, 2> Users; 648 Users.push_back(Ptr); 649 do { 650 Ptr = Users.pop_back_val(); 651 for (const User *U : Ptr->users()) { 652 if (isa<ReturnInst>(U) || GetBasicARCInstKind(U) == ARCInstKind::RetainRV) 653 return; 654 if (isa<BitCastInst>(U)) 655 Users.push_back(U); 656 } 657 } while (!Users.empty()); 658 659 Changed = true; 660 ++NumPeeps; 661 662 DEBUG(dbgs() << "Transforming objc_autoreleaseReturnValue => " 663 "objc_autorelease since its operand is not used as a return " 664 "value.\n" 665 "Old = " << *AutoreleaseRV << "\n"); 666 667 CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV); 668 Constant *NewDecl = EP.get(ARCRuntimeEntryPointKind::Autorelease); 669 AutoreleaseRVCI->setCalledFunction(NewDecl); 670 AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease. 671 Class = ARCInstKind::Autorelease; 672 673 DEBUG(dbgs() << "New: " << *AutoreleaseRV << "\n"); 674 675 } 676 677 /// Visit each call, one at a time, and make simplifications without doing any 678 /// additional analysis. 679 void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { 680 DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeIndividualCalls ==\n"); 681 // Reset all the flags in preparation for recomputing them. 682 UsedInThisFunction = 0; 683 684 // Visit all objc_* calls in F. 685 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { 686 Instruction *Inst = &*I++; 687 688 ARCInstKind Class = GetBasicARCInstKind(Inst); 689 690 DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n"); 691 692 switch (Class) { 693 default: break; 694 695 // Delete no-op casts. These function calls have special semantics, but 696 // the semantics are entirely implemented via lowering in the front-end, 697 // so by the time they reach the optimizer, they are just no-op calls 698 // which return their argument. 699 // 700 // There are gray areas here, as the ability to cast reference-counted 701 // pointers to raw void* and back allows code to break ARC assumptions, 702 // however these are currently considered to be unimportant. 703 case ARCInstKind::NoopCast: 704 Changed = true; 705 ++NumNoops; 706 DEBUG(dbgs() << "Erasing no-op cast: " << *Inst << "\n"); 707 EraseInstruction(Inst); 708 continue; 709 710 // If the pointer-to-weak-pointer is null, it's undefined behavior. 711 case ARCInstKind::StoreWeak: 712 case ARCInstKind::LoadWeak: 713 case ARCInstKind::LoadWeakRetained: 714 case ARCInstKind::InitWeak: 715 case ARCInstKind::DestroyWeak: { 716 CallInst *CI = cast<CallInst>(Inst); 717 if (IsNullOrUndef(CI->getArgOperand(0))) { 718 Changed = true; 719 Type *Ty = CI->getArgOperand(0)->getType(); 720 new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), 721 Constant::getNullValue(Ty), 722 CI); 723 llvm::Value *NewValue = UndefValue::get(CI->getType()); 724 DEBUG(dbgs() << "A null pointer-to-weak-pointer is undefined behavior." 725 "\nOld = " << *CI << "\nNew = " << *NewValue << "\n"); 726 CI->replaceAllUsesWith(NewValue); 727 CI->eraseFromParent(); 728 continue; 729 } 730 break; 731 } 732 case ARCInstKind::CopyWeak: 733 case ARCInstKind::MoveWeak: { 734 CallInst *CI = cast<CallInst>(Inst); 735 if (IsNullOrUndef(CI->getArgOperand(0)) || 736 IsNullOrUndef(CI->getArgOperand(1))) { 737 Changed = true; 738 Type *Ty = CI->getArgOperand(0)->getType(); 739 new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), 740 Constant::getNullValue(Ty), 741 CI); 742 743 llvm::Value *NewValue = UndefValue::get(CI->getType()); 744 DEBUG(dbgs() << "A null pointer-to-weak-pointer is undefined behavior." 745 "\nOld = " << *CI << "\nNew = " << *NewValue << "\n"); 746 747 CI->replaceAllUsesWith(NewValue); 748 CI->eraseFromParent(); 749 continue; 750 } 751 break; 752 } 753 case ARCInstKind::RetainRV: 754 if (OptimizeRetainRVCall(F, Inst)) 755 continue; 756 break; 757 case ARCInstKind::AutoreleaseRV: 758 OptimizeAutoreleaseRVCall(F, Inst, Class); 759 break; 760 } 761 762 // objc_autorelease(x) -> objc_release(x) if x is otherwise unused. 763 if (IsAutorelease(Class) && Inst->use_empty()) { 764 CallInst *Call = cast<CallInst>(Inst); 765 const Value *Arg = Call->getArgOperand(0); 766 Arg = FindSingleUseIdentifiedObject(Arg); 767 if (Arg) { 768 Changed = true; 769 ++NumAutoreleases; 770 771 // Create the declaration lazily. 772 LLVMContext &C = Inst->getContext(); 773 774 Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Release); 775 CallInst *NewCall = CallInst::Create(Decl, Call->getArgOperand(0), "", 776 Call); 777 NewCall->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), 778 MDNode::get(C, None)); 779 780 DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) " 781 "since x is otherwise unused.\nOld: " << *Call << "\nNew: " 782 << *NewCall << "\n"); 783 784 EraseInstruction(Call); 785 Inst = NewCall; 786 Class = ARCInstKind::Release; 787 } 788 } 789 790 // For functions which can never be passed stack arguments, add 791 // a tail keyword. 792 if (IsAlwaysTail(Class)) { 793 Changed = true; 794 DEBUG(dbgs() << "Adding tail keyword to function since it can never be " 795 "passed stack args: " << *Inst << "\n"); 796 cast<CallInst>(Inst)->setTailCall(); 797 } 798 799 // Ensure that functions that can never have a "tail" keyword due to the 800 // semantics of ARC truly do not do so. 801 if (IsNeverTail(Class)) { 802 Changed = true; 803 DEBUG(dbgs() << "Removing tail keyword from function: " << *Inst << 804 "\n"); 805 cast<CallInst>(Inst)->setTailCall(false); 806 } 807 808 // Set nounwind as needed. 809 if (IsNoThrow(Class)) { 810 Changed = true; 811 DEBUG(dbgs() << "Found no throw class. Setting nounwind on: " << *Inst 812 << "\n"); 813 cast<CallInst>(Inst)->setDoesNotThrow(); 814 } 815 816 if (!IsNoopOnNull(Class)) { 817 UsedInThisFunction |= 1 << unsigned(Class); 818 continue; 819 } 820 821 const Value *Arg = GetArgRCIdentityRoot(Inst); 822 823 // ARC calls with null are no-ops. Delete them. 824 if (IsNullOrUndef(Arg)) { 825 Changed = true; 826 ++NumNoops; 827 DEBUG(dbgs() << "ARC calls with null are no-ops. Erasing: " << *Inst 828 << "\n"); 829 EraseInstruction(Inst); 830 continue; 831 } 832 833 // Keep track of which of retain, release, autorelease, and retain_block 834 // are actually present in this function. 835 UsedInThisFunction |= 1 << unsigned(Class); 836 837 // If Arg is a PHI, and one or more incoming values to the 838 // PHI are null, and the call is control-equivalent to the PHI, and there 839 // are no relevant side effects between the PHI and the call, the call 840 // could be pushed up to just those paths with non-null incoming values. 841 // For now, don't bother splitting critical edges for this. 842 SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist; 843 Worklist.push_back(std::make_pair(Inst, Arg)); 844 do { 845 std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val(); 846 Inst = Pair.first; 847 Arg = Pair.second; 848 849 const PHINode *PN = dyn_cast<PHINode>(Arg); 850 if (!PN) continue; 851 852 // Determine if the PHI has any null operands, or any incoming 853 // critical edges. 854 bool HasNull = false; 855 bool HasCriticalEdges = false; 856 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 857 Value *Incoming = 858 GetRCIdentityRoot(PN->getIncomingValue(i)); 859 if (IsNullOrUndef(Incoming)) 860 HasNull = true; 861 else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back()) 862 .getNumSuccessors() != 1) { 863 HasCriticalEdges = true; 864 break; 865 } 866 } 867 // If we have null operands and no critical edges, optimize. 868 if (!HasCriticalEdges && HasNull) { 869 SmallPtrSet<Instruction *, 4> DependingInstructions; 870 SmallPtrSet<const BasicBlock *, 4> Visited; 871 872 // Check that there is nothing that cares about the reference 873 // count between the call and the phi. 874 switch (Class) { 875 case ARCInstKind::Retain: 876 case ARCInstKind::RetainBlock: 877 // These can always be moved up. 878 break; 879 case ARCInstKind::Release: 880 // These can't be moved across things that care about the retain 881 // count. 882 FindDependencies(NeedsPositiveRetainCount, Arg, 883 Inst->getParent(), Inst, 884 DependingInstructions, Visited, PA); 885 break; 886 case ARCInstKind::Autorelease: 887 // These can't be moved across autorelease pool scope boundaries. 888 FindDependencies(AutoreleasePoolBoundary, Arg, 889 Inst->getParent(), Inst, 890 DependingInstructions, Visited, PA); 891 break; 892 case ARCInstKind::ClaimRV: 893 case ARCInstKind::RetainRV: 894 case ARCInstKind::AutoreleaseRV: 895 // Don't move these; the RV optimization depends on the autoreleaseRV 896 // being tail called, and the retainRV being immediately after a call 897 // (which might still happen if we get lucky with codegen layout, but 898 // it's not worth taking the chance). 899 continue; 900 default: 901 llvm_unreachable("Invalid dependence flavor"); 902 } 903 904 if (DependingInstructions.size() == 1 && 905 *DependingInstructions.begin() == PN) { 906 Changed = true; 907 ++NumPartialNoops; 908 // Clone the call into each predecessor that has a non-null value. 909 CallInst *CInst = cast<CallInst>(Inst); 910 Type *ParamTy = CInst->getArgOperand(0)->getType(); 911 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 912 Value *Incoming = 913 GetRCIdentityRoot(PN->getIncomingValue(i)); 914 if (!IsNullOrUndef(Incoming)) { 915 CallInst *Clone = cast<CallInst>(CInst->clone()); 916 Value *Op = PN->getIncomingValue(i); 917 Instruction *InsertPos = &PN->getIncomingBlock(i)->back(); 918 if (Op->getType() != ParamTy) 919 Op = new BitCastInst(Op, ParamTy, "", InsertPos); 920 Clone->setArgOperand(0, Op); 921 Clone->insertBefore(InsertPos); 922 923 DEBUG(dbgs() << "Cloning " 924 << *CInst << "\n" 925 "And inserting clone at " << *InsertPos << "\n"); 926 Worklist.push_back(std::make_pair(Clone, Incoming)); 927 } 928 } 929 // Erase the original call. 930 DEBUG(dbgs() << "Erasing: " << *CInst << "\n"); 931 EraseInstruction(CInst); 932 continue; 933 } 934 } 935 } while (!Worklist.empty()); 936 } 937 } 938 939 /// If we have a top down pointer in the S_Use state, make sure that there are 940 /// no CFG hazards by checking the states of various bottom up pointers. 941 static void CheckForUseCFGHazard(const Sequence SuccSSeq, 942 const bool SuccSRRIKnownSafe, 943 TopDownPtrState &S, 944 bool &SomeSuccHasSame, 945 bool &AllSuccsHaveSame, 946 bool &NotAllSeqEqualButKnownSafe, 947 bool &ShouldContinue) { 948 switch (SuccSSeq) { 949 case S_CanRelease: { 950 if (!S.IsKnownSafe() && !SuccSRRIKnownSafe) { 951 S.ClearSequenceProgress(); 952 break; 953 } 954 S.SetCFGHazardAfflicted(true); 955 ShouldContinue = true; 956 break; 957 } 958 case S_Use: 959 SomeSuccHasSame = true; 960 break; 961 case S_Stop: 962 case S_Release: 963 case S_MovableRelease: 964 if (!S.IsKnownSafe() && !SuccSRRIKnownSafe) 965 AllSuccsHaveSame = false; 966 else 967 NotAllSeqEqualButKnownSafe = true; 968 break; 969 case S_Retain: 970 llvm_unreachable("bottom-up pointer in retain state!"); 971 case S_None: 972 llvm_unreachable("This should have been handled earlier."); 973 } 974 } 975 976 /// If we have a Top Down pointer in the S_CanRelease state, make sure that 977 /// there are no CFG hazards by checking the states of various bottom up 978 /// pointers. 979 static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq, 980 const bool SuccSRRIKnownSafe, 981 TopDownPtrState &S, 982 bool &SomeSuccHasSame, 983 bool &AllSuccsHaveSame, 984 bool &NotAllSeqEqualButKnownSafe) { 985 switch (SuccSSeq) { 986 case S_CanRelease: 987 SomeSuccHasSame = true; 988 break; 989 case S_Stop: 990 case S_Release: 991 case S_MovableRelease: 992 case S_Use: 993 if (!S.IsKnownSafe() && !SuccSRRIKnownSafe) 994 AllSuccsHaveSame = false; 995 else 996 NotAllSeqEqualButKnownSafe = true; 997 break; 998 case S_Retain: 999 llvm_unreachable("bottom-up pointer in retain state!"); 1000 case S_None: 1001 llvm_unreachable("This should have been handled earlier."); 1002 } 1003 } 1004 1005 /// Check for critical edges, loop boundaries, irreducible control flow, or 1006 /// other CFG structures where moving code across the edge would result in it 1007 /// being executed more. 1008 void 1009 ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, 1010 DenseMap<const BasicBlock *, BBState> &BBStates, 1011 BBState &MyStates) const { 1012 // If any top-down local-use or possible-dec has a succ which is earlier in 1013 // the sequence, forget it. 1014 for (auto I = MyStates.top_down_ptr_begin(), E = MyStates.top_down_ptr_end(); 1015 I != E; ++I) { 1016 TopDownPtrState &S = I->second; 1017 const Sequence Seq = I->second.GetSeq(); 1018 1019 // We only care about S_Retain, S_CanRelease, and S_Use. 1020 if (Seq == S_None) 1021 continue; 1022 1023 // Make sure that if extra top down states are added in the future that this 1024 // code is updated to handle it. 1025 assert((Seq == S_Retain || Seq == S_CanRelease || Seq == S_Use) && 1026 "Unknown top down sequence state."); 1027 1028 const Value *Arg = I->first; 1029 const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); 1030 bool SomeSuccHasSame = false; 1031 bool AllSuccsHaveSame = true; 1032 bool NotAllSeqEqualButKnownSafe = false; 1033 1034 succ_const_iterator SI(TI), SE(TI, false); 1035 1036 for (; SI != SE; ++SI) { 1037 // If VisitBottomUp has pointer information for this successor, take 1038 // what we know about it. 1039 const DenseMap<const BasicBlock *, BBState>::iterator BBI = 1040 BBStates.find(*SI); 1041 assert(BBI != BBStates.end()); 1042 const BottomUpPtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); 1043 const Sequence SuccSSeq = SuccS.GetSeq(); 1044 1045 // If bottom up, the pointer is in an S_None state, clear the sequence 1046 // progress since the sequence in the bottom up state finished 1047 // suggesting a mismatch in between retains/releases. This is true for 1048 // all three cases that we are handling here: S_Retain, S_Use, and 1049 // S_CanRelease. 1050 if (SuccSSeq == S_None) { 1051 S.ClearSequenceProgress(); 1052 continue; 1053 } 1054 1055 // If we have S_Use or S_CanRelease, perform our check for cfg hazard 1056 // checks. 1057 const bool SuccSRRIKnownSafe = SuccS.IsKnownSafe(); 1058 1059 // *NOTE* We do not use Seq from above here since we are allowing for 1060 // S.GetSeq() to change while we are visiting basic blocks. 1061 switch(S.GetSeq()) { 1062 case S_Use: { 1063 bool ShouldContinue = false; 1064 CheckForUseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S, SomeSuccHasSame, 1065 AllSuccsHaveSame, NotAllSeqEqualButKnownSafe, 1066 ShouldContinue); 1067 if (ShouldContinue) 1068 continue; 1069 break; 1070 } 1071 case S_CanRelease: { 1072 CheckForCanReleaseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S, 1073 SomeSuccHasSame, AllSuccsHaveSame, 1074 NotAllSeqEqualButKnownSafe); 1075 break; 1076 } 1077 case S_Retain: 1078 case S_None: 1079 case S_Stop: 1080 case S_Release: 1081 case S_MovableRelease: 1082 break; 1083 } 1084 } 1085 1086 // If the state at the other end of any of the successor edges 1087 // matches the current state, require all edges to match. This 1088 // guards against loops in the middle of a sequence. 1089 if (SomeSuccHasSame && !AllSuccsHaveSame) { 1090 S.ClearSequenceProgress(); 1091 } else if (NotAllSeqEqualButKnownSafe) { 1092 // If we would have cleared the state foregoing the fact that we are known 1093 // safe, stop code motion. This is because whether or not it is safe to 1094 // remove RR pairs via KnownSafe is an orthogonal concept to whether we 1095 // are allowed to perform code motion. 1096 S.SetCFGHazardAfflicted(true); 1097 } 1098 } 1099 } 1100 1101 bool ObjCARCOpt::VisitInstructionBottomUp( 1102 Instruction *Inst, BasicBlock *BB, BlotMapVector<Value *, RRInfo> &Retains, 1103 BBState &MyStates) { 1104 bool NestingDetected = false; 1105 ARCInstKind Class = GetARCInstKind(Inst); 1106 const Value *Arg = nullptr; 1107 1108 DEBUG(dbgs() << " Class: " << Class << "\n"); 1109 1110 switch (Class) { 1111 case ARCInstKind::Release: { 1112 Arg = GetArgRCIdentityRoot(Inst); 1113 1114 BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg); 1115 NestingDetected |= S.InitBottomUp(MDKindCache, Inst); 1116 break; 1117 } 1118 case ARCInstKind::RetainBlock: 1119 // In OptimizeIndividualCalls, we have strength reduced all optimizable 1120 // objc_retainBlocks to objc_retains. Thus at this point any 1121 // objc_retainBlocks that we see are not optimizable. 1122 break; 1123 case ARCInstKind::Retain: 1124 case ARCInstKind::RetainRV: { 1125 Arg = GetArgRCIdentityRoot(Inst); 1126 BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg); 1127 if (S.MatchWithRetain()) { 1128 // Don't do retain+release tracking for ARCInstKind::RetainRV, because 1129 // it's better to let it remain as the first instruction after a call. 1130 if (Class != ARCInstKind::RetainRV) { 1131 DEBUG(llvm::dbgs() << " Matching with: " << *Inst << "\n"); 1132 Retains[Inst] = S.GetRRInfo(); 1133 } 1134 S.ClearSequenceProgress(); 1135 } 1136 // A retain moving bottom up can be a use. 1137 break; 1138 } 1139 case ARCInstKind::AutoreleasepoolPop: 1140 // Conservatively, clear MyStates for all known pointers. 1141 MyStates.clearBottomUpPointers(); 1142 return NestingDetected; 1143 case ARCInstKind::AutoreleasepoolPush: 1144 case ARCInstKind::None: 1145 // These are irrelevant. 1146 return NestingDetected; 1147 case ARCInstKind::User: 1148 // If we have a store into an alloca of a pointer we are tracking, the 1149 // pointer has multiple owners implying that we must be more conservative. 1150 // 1151 // This comes up in the context of a pointer being ``KnownSafe''. In the 1152 // presence of a block being initialized, the frontend will emit the 1153 // objc_retain on the original pointer and the release on the pointer loaded 1154 // from the alloca. The optimizer will through the provenance analysis 1155 // realize that the two are related, but since we only require KnownSafe in 1156 // one direction, will match the inner retain on the original pointer with 1157 // the guard release on the original pointer. This is fixed by ensuring that 1158 // in the presence of allocas we only unconditionally remove pointers if 1159 // both our retain and our release are KnownSafe. 1160 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 1161 const DataLayout &DL = BB->getModule()->getDataLayout(); 1162 if (AreAnyUnderlyingObjectsAnAlloca(SI->getPointerOperand(), DL)) { 1163 auto I = MyStates.findPtrBottomUpState( 1164 GetRCIdentityRoot(SI->getValueOperand())); 1165 if (I != MyStates.bottom_up_ptr_end()) 1166 MultiOwnersSet.insert(I->first); 1167 } 1168 } 1169 break; 1170 default: 1171 break; 1172 } 1173 1174 // Consider any other possible effects of this instruction on each 1175 // pointer being tracked. 1176 for (auto MI = MyStates.bottom_up_ptr_begin(), 1177 ME = MyStates.bottom_up_ptr_end(); 1178 MI != ME; ++MI) { 1179 const Value *Ptr = MI->first; 1180 if (Ptr == Arg) 1181 continue; // Handled above. 1182 BottomUpPtrState &S = MI->second; 1183 1184 if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class)) 1185 continue; 1186 1187 S.HandlePotentialUse(BB, Inst, Ptr, PA, Class); 1188 } 1189 1190 return NestingDetected; 1191 } 1192 1193 bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB, 1194 DenseMap<const BasicBlock *, BBState> &BBStates, 1195 BlotMapVector<Value *, RRInfo> &Retains) { 1196 1197 DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n"); 1198 1199 bool NestingDetected = false; 1200 BBState &MyStates = BBStates[BB]; 1201 1202 // Merge the states from each successor to compute the initial state 1203 // for the current block. 1204 BBState::edge_iterator SI(MyStates.succ_begin()), 1205 SE(MyStates.succ_end()); 1206 if (SI != SE) { 1207 const BasicBlock *Succ = *SI; 1208 DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ); 1209 assert(I != BBStates.end()); 1210 MyStates.InitFromSucc(I->second); 1211 ++SI; 1212 for (; SI != SE; ++SI) { 1213 Succ = *SI; 1214 I = BBStates.find(Succ); 1215 assert(I != BBStates.end()); 1216 MyStates.MergeSucc(I->second); 1217 } 1218 } 1219 1220 DEBUG(llvm::dbgs() << "Before:\n" << BBStates[BB] << "\n" 1221 << "Performing Dataflow:\n"); 1222 1223 // Visit all the instructions, bottom-up. 1224 for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) { 1225 Instruction *Inst = &*std::prev(I); 1226 1227 // Invoke instructions are visited as part of their successors (below). 1228 if (isa<InvokeInst>(Inst)) 1229 continue; 1230 1231 DEBUG(dbgs() << " Visiting " << *Inst << "\n"); 1232 1233 NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates); 1234 } 1235 1236 // If there's a predecessor with an invoke, visit the invoke as if it were 1237 // part of this block, since we can't insert code after an invoke in its own 1238 // block, and we don't want to split critical edges. 1239 for (BBState::edge_iterator PI(MyStates.pred_begin()), 1240 PE(MyStates.pred_end()); PI != PE; ++PI) { 1241 BasicBlock *Pred = *PI; 1242 if (InvokeInst *II = dyn_cast<InvokeInst>(&Pred->back())) 1243 NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates); 1244 } 1245 1246 DEBUG(llvm::dbgs() << "\nFinal State:\n" << BBStates[BB] << "\n"); 1247 1248 return NestingDetected; 1249 } 1250 1251 bool 1252 ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, 1253 DenseMap<Value *, RRInfo> &Releases, 1254 BBState &MyStates) { 1255 bool NestingDetected = false; 1256 ARCInstKind Class = GetARCInstKind(Inst); 1257 const Value *Arg = nullptr; 1258 1259 DEBUG(llvm::dbgs() << " Class: " << Class << "\n"); 1260 1261 switch (Class) { 1262 case ARCInstKind::RetainBlock: 1263 // In OptimizeIndividualCalls, we have strength reduced all optimizable 1264 // objc_retainBlocks to objc_retains. Thus at this point any 1265 // objc_retainBlocks that we see are not optimizable. We need to break since 1266 // a retain can be a potential use. 1267 break; 1268 case ARCInstKind::Retain: 1269 case ARCInstKind::RetainRV: { 1270 Arg = GetArgRCIdentityRoot(Inst); 1271 TopDownPtrState &S = MyStates.getPtrTopDownState(Arg); 1272 NestingDetected |= S.InitTopDown(Class, Inst); 1273 // A retain can be a potential use; proceed to the generic checking 1274 // code below. 1275 break; 1276 } 1277 case ARCInstKind::Release: { 1278 Arg = GetArgRCIdentityRoot(Inst); 1279 TopDownPtrState &S = MyStates.getPtrTopDownState(Arg); 1280 // Try to form a tentative pair in between this release instruction and the 1281 // top down pointers that we are tracking. 1282 if (S.MatchWithRelease(MDKindCache, Inst)) { 1283 // If we succeed, copy S's RRInfo into the Release -> {Retain Set 1284 // Map}. Then we clear S. 1285 DEBUG(llvm::dbgs() << " Matching with: " << *Inst << "\n"); 1286 Releases[Inst] = S.GetRRInfo(); 1287 S.ClearSequenceProgress(); 1288 } 1289 break; 1290 } 1291 case ARCInstKind::AutoreleasepoolPop: 1292 // Conservatively, clear MyStates for all known pointers. 1293 MyStates.clearTopDownPointers(); 1294 return false; 1295 case ARCInstKind::AutoreleasepoolPush: 1296 case ARCInstKind::None: 1297 // These can not be uses of 1298 return false; 1299 default: 1300 break; 1301 } 1302 1303 // Consider any other possible effects of this instruction on each 1304 // pointer being tracked. 1305 for (auto MI = MyStates.top_down_ptr_begin(), 1306 ME = MyStates.top_down_ptr_end(); 1307 MI != ME; ++MI) { 1308 const Value *Ptr = MI->first; 1309 if (Ptr == Arg) 1310 continue; // Handled above. 1311 TopDownPtrState &S = MI->second; 1312 if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class)) 1313 continue; 1314 1315 S.HandlePotentialUse(Inst, Ptr, PA, Class); 1316 } 1317 1318 return NestingDetected; 1319 } 1320 1321 bool 1322 ObjCARCOpt::VisitTopDown(BasicBlock *BB, 1323 DenseMap<const BasicBlock *, BBState> &BBStates, 1324 DenseMap<Value *, RRInfo> &Releases) { 1325 DEBUG(dbgs() << "\n== ObjCARCOpt::VisitTopDown ==\n"); 1326 bool NestingDetected = false; 1327 BBState &MyStates = BBStates[BB]; 1328 1329 // Merge the states from each predecessor to compute the initial state 1330 // for the current block. 1331 BBState::edge_iterator PI(MyStates.pred_begin()), 1332 PE(MyStates.pred_end()); 1333 if (PI != PE) { 1334 const BasicBlock *Pred = *PI; 1335 DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred); 1336 assert(I != BBStates.end()); 1337 MyStates.InitFromPred(I->second); 1338 ++PI; 1339 for (; PI != PE; ++PI) { 1340 Pred = *PI; 1341 I = BBStates.find(Pred); 1342 assert(I != BBStates.end()); 1343 MyStates.MergePred(I->second); 1344 } 1345 } 1346 1347 DEBUG(llvm::dbgs() << "Before:\n" << BBStates[BB] << "\n" 1348 << "Performing Dataflow:\n"); 1349 1350 // Visit all the instructions, top-down. 1351 for (Instruction &Inst : *BB) { 1352 DEBUG(dbgs() << " Visiting " << Inst << "\n"); 1353 1354 NestingDetected |= VisitInstructionTopDown(&Inst, Releases, MyStates); 1355 } 1356 1357 DEBUG(llvm::dbgs() << "\nState Before Checking for CFG Hazards:\n" 1358 << BBStates[BB] << "\n\n"); 1359 CheckForCFGHazards(BB, BBStates, MyStates); 1360 DEBUG(llvm::dbgs() << "Final State:\n" << BBStates[BB] << "\n"); 1361 return NestingDetected; 1362 } 1363 1364 static void 1365 ComputePostOrders(Function &F, 1366 SmallVectorImpl<BasicBlock *> &PostOrder, 1367 SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder, 1368 unsigned NoObjCARCExceptionsMDKind, 1369 DenseMap<const BasicBlock *, BBState> &BBStates) { 1370 /// The visited set, for doing DFS walks. 1371 SmallPtrSet<BasicBlock *, 16> Visited; 1372 1373 // Do DFS, computing the PostOrder. 1374 SmallPtrSet<BasicBlock *, 16> OnStack; 1375 SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack; 1376 1377 // Functions always have exactly one entry block, and we don't have 1378 // any other block that we treat like an entry block. 1379 BasicBlock *EntryBB = &F.getEntryBlock(); 1380 BBState &MyStates = BBStates[EntryBB]; 1381 MyStates.SetAsEntry(); 1382 TerminatorInst *EntryTI = cast<TerminatorInst>(&EntryBB->back()); 1383 SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI))); 1384 Visited.insert(EntryBB); 1385 OnStack.insert(EntryBB); 1386 do { 1387 dfs_next_succ: 1388 BasicBlock *CurrBB = SuccStack.back().first; 1389 TerminatorInst *TI = cast<TerminatorInst>(&CurrBB->back()); 1390 succ_iterator SE(TI, false); 1391 1392 while (SuccStack.back().second != SE) { 1393 BasicBlock *SuccBB = *SuccStack.back().second++; 1394 if (Visited.insert(SuccBB).second) { 1395 TerminatorInst *TI = cast<TerminatorInst>(&SuccBB->back()); 1396 SuccStack.push_back(std::make_pair(SuccBB, succ_iterator(TI))); 1397 BBStates[CurrBB].addSucc(SuccBB); 1398 BBState &SuccStates = BBStates[SuccBB]; 1399 SuccStates.addPred(CurrBB); 1400 OnStack.insert(SuccBB); 1401 goto dfs_next_succ; 1402 } 1403 1404 if (!OnStack.count(SuccBB)) { 1405 BBStates[CurrBB].addSucc(SuccBB); 1406 BBStates[SuccBB].addPred(CurrBB); 1407 } 1408 } 1409 OnStack.erase(CurrBB); 1410 PostOrder.push_back(CurrBB); 1411 SuccStack.pop_back(); 1412 } while (!SuccStack.empty()); 1413 1414 Visited.clear(); 1415 1416 // Do reverse-CFG DFS, computing the reverse-CFG PostOrder. 1417 // Functions may have many exits, and there also blocks which we treat 1418 // as exits due to ignored edges. 1419 SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack; 1420 for (BasicBlock &ExitBB : F) { 1421 BBState &MyStates = BBStates[&ExitBB]; 1422 if (!MyStates.isExit()) 1423 continue; 1424 1425 MyStates.SetAsExit(); 1426 1427 PredStack.push_back(std::make_pair(&ExitBB, MyStates.pred_begin())); 1428 Visited.insert(&ExitBB); 1429 while (!PredStack.empty()) { 1430 reverse_dfs_next_succ: 1431 BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end(); 1432 while (PredStack.back().second != PE) { 1433 BasicBlock *BB = *PredStack.back().second++; 1434 if (Visited.insert(BB).second) { 1435 PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin())); 1436 goto reverse_dfs_next_succ; 1437 } 1438 } 1439 ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first); 1440 } 1441 } 1442 } 1443 1444 // Visit the function both top-down and bottom-up. 1445 bool ObjCARCOpt::Visit(Function &F, 1446 DenseMap<const BasicBlock *, BBState> &BBStates, 1447 BlotMapVector<Value *, RRInfo> &Retains, 1448 DenseMap<Value *, RRInfo> &Releases) { 1449 1450 // Use reverse-postorder traversals, because we magically know that loops 1451 // will be well behaved, i.e. they won't repeatedly call retain on a single 1452 // pointer without doing a release. We can't use the ReversePostOrderTraversal 1453 // class here because we want the reverse-CFG postorder to consider each 1454 // function exit point, and we want to ignore selected cycle edges. 1455 SmallVector<BasicBlock *, 16> PostOrder; 1456 SmallVector<BasicBlock *, 16> ReverseCFGPostOrder; 1457 ComputePostOrders(F, PostOrder, ReverseCFGPostOrder, 1458 MDKindCache.get(ARCMDKindID::NoObjCARCExceptions), 1459 BBStates); 1460 1461 // Use reverse-postorder on the reverse CFG for bottom-up. 1462 bool BottomUpNestingDetected = false; 1463 for (BasicBlock *BB : reverse(ReverseCFGPostOrder)) 1464 BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains); 1465 1466 // Use reverse-postorder for top-down. 1467 bool TopDownNestingDetected = false; 1468 for (BasicBlock *BB : reverse(PostOrder)) 1469 TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases); 1470 1471 return TopDownNestingDetected && BottomUpNestingDetected; 1472 } 1473 1474 /// Move the calls in RetainsToMove and ReleasesToMove. 1475 void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &RetainsToMove, 1476 RRInfo &ReleasesToMove, 1477 BlotMapVector<Value *, RRInfo> &Retains, 1478 DenseMap<Value *, RRInfo> &Releases, 1479 SmallVectorImpl<Instruction *> &DeadInsts, 1480 Module *M) { 1481 Type *ArgTy = Arg->getType(); 1482 Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext())); 1483 1484 DEBUG(dbgs() << "== ObjCARCOpt::MoveCalls ==\n"); 1485 1486 // Insert the new retain and release calls. 1487 for (Instruction *InsertPt : ReleasesToMove.ReverseInsertPts) { 1488 Value *MyArg = ArgTy == ParamTy ? Arg : 1489 new BitCastInst(Arg, ParamTy, "", InsertPt); 1490 Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain); 1491 CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt); 1492 Call->setDoesNotThrow(); 1493 Call->setTailCall(); 1494 1495 DEBUG(dbgs() << "Inserting new Retain: " << *Call << "\n" 1496 "At insertion point: " << *InsertPt << "\n"); 1497 } 1498 for (Instruction *InsertPt : RetainsToMove.ReverseInsertPts) { 1499 Value *MyArg = ArgTy == ParamTy ? Arg : 1500 new BitCastInst(Arg, ParamTy, "", InsertPt); 1501 Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Release); 1502 CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt); 1503 // Attach a clang.imprecise_release metadata tag, if appropriate. 1504 if (MDNode *M = ReleasesToMove.ReleaseMetadata) 1505 Call->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), M); 1506 Call->setDoesNotThrow(); 1507 if (ReleasesToMove.IsTailCallRelease) 1508 Call->setTailCall(); 1509 1510 DEBUG(dbgs() << "Inserting new Release: " << *Call << "\n" 1511 "At insertion point: " << *InsertPt << "\n"); 1512 } 1513 1514 // Delete the original retain and release calls. 1515 for (Instruction *OrigRetain : RetainsToMove.Calls) { 1516 Retains.blot(OrigRetain); 1517 DeadInsts.push_back(OrigRetain); 1518 DEBUG(dbgs() << "Deleting retain: " << *OrigRetain << "\n"); 1519 } 1520 for (Instruction *OrigRelease : ReleasesToMove.Calls) { 1521 Releases.erase(OrigRelease); 1522 DeadInsts.push_back(OrigRelease); 1523 DEBUG(dbgs() << "Deleting release: " << *OrigRelease << "\n"); 1524 } 1525 1526 } 1527 1528 bool ObjCARCOpt::PairUpRetainsAndReleases( 1529 DenseMap<const BasicBlock *, BBState> &BBStates, 1530 BlotMapVector<Value *, RRInfo> &Retains, 1531 DenseMap<Value *, RRInfo> &Releases, Module *M, 1532 SmallVectorImpl<Instruction *> &NewRetains, 1533 SmallVectorImpl<Instruction *> &NewReleases, 1534 SmallVectorImpl<Instruction *> &DeadInsts, RRInfo &RetainsToMove, 1535 RRInfo &ReleasesToMove, Value *Arg, bool KnownSafe, 1536 bool &AnyPairsCompletelyEliminated) { 1537 // If a pair happens in a region where it is known that the reference count 1538 // is already incremented, we can similarly ignore possible decrements unless 1539 // we are dealing with a retainable object with multiple provenance sources. 1540 bool KnownSafeTD = true, KnownSafeBU = true; 1541 bool MultipleOwners = false; 1542 bool CFGHazardAfflicted = false; 1543 1544 // Connect the dots between the top-down-collected RetainsToMove and 1545 // bottom-up-collected ReleasesToMove to form sets of related calls. 1546 // This is an iterative process so that we connect multiple releases 1547 // to multiple retains if needed. 1548 unsigned OldDelta = 0; 1549 unsigned NewDelta = 0; 1550 unsigned OldCount = 0; 1551 unsigned NewCount = 0; 1552 bool FirstRelease = true; 1553 for (;;) { 1554 for (Instruction *NewRetain : NewRetains) { 1555 auto It = Retains.find(NewRetain); 1556 assert(It != Retains.end()); 1557 const RRInfo &NewRetainRRI = It->second; 1558 KnownSafeTD &= NewRetainRRI.KnownSafe; 1559 MultipleOwners = 1560 MultipleOwners || MultiOwnersSet.count(GetArgRCIdentityRoot(NewRetain)); 1561 for (Instruction *NewRetainRelease : NewRetainRRI.Calls) { 1562 auto Jt = Releases.find(NewRetainRelease); 1563 if (Jt == Releases.end()) 1564 return false; 1565 const RRInfo &NewRetainReleaseRRI = Jt->second; 1566 1567 // If the release does not have a reference to the retain as well, 1568 // something happened which is unaccounted for. Do not do anything. 1569 // 1570 // This can happen if we catch an additive overflow during path count 1571 // merging. 1572 if (!NewRetainReleaseRRI.Calls.count(NewRetain)) 1573 return false; 1574 1575 if (ReleasesToMove.Calls.insert(NewRetainRelease).second) { 1576 1577 // If we overflow when we compute the path count, don't remove/move 1578 // anything. 1579 const BBState &NRRBBState = BBStates[NewRetainRelease->getParent()]; 1580 unsigned PathCount = BBState::OverflowOccurredValue; 1581 if (NRRBBState.GetAllPathCountWithOverflow(PathCount)) 1582 return false; 1583 assert(PathCount != BBState::OverflowOccurredValue && 1584 "PathCount at this point can not be " 1585 "OverflowOccurredValue."); 1586 OldDelta -= PathCount; 1587 1588 // Merge the ReleaseMetadata and IsTailCallRelease values. 1589 if (FirstRelease) { 1590 ReleasesToMove.ReleaseMetadata = 1591 NewRetainReleaseRRI.ReleaseMetadata; 1592 ReleasesToMove.IsTailCallRelease = 1593 NewRetainReleaseRRI.IsTailCallRelease; 1594 FirstRelease = false; 1595 } else { 1596 if (ReleasesToMove.ReleaseMetadata != 1597 NewRetainReleaseRRI.ReleaseMetadata) 1598 ReleasesToMove.ReleaseMetadata = nullptr; 1599 if (ReleasesToMove.IsTailCallRelease != 1600 NewRetainReleaseRRI.IsTailCallRelease) 1601 ReleasesToMove.IsTailCallRelease = false; 1602 } 1603 1604 // Collect the optimal insertion points. 1605 if (!KnownSafe) 1606 for (Instruction *RIP : NewRetainReleaseRRI.ReverseInsertPts) { 1607 if (ReleasesToMove.ReverseInsertPts.insert(RIP).second) { 1608 // If we overflow when we compute the path count, don't 1609 // remove/move anything. 1610 const BBState &RIPBBState = BBStates[RIP->getParent()]; 1611 PathCount = BBState::OverflowOccurredValue; 1612 if (RIPBBState.GetAllPathCountWithOverflow(PathCount)) 1613 return false; 1614 assert(PathCount != BBState::OverflowOccurredValue && 1615 "PathCount at this point can not be " 1616 "OverflowOccurredValue."); 1617 NewDelta -= PathCount; 1618 } 1619 } 1620 NewReleases.push_back(NewRetainRelease); 1621 } 1622 } 1623 } 1624 NewRetains.clear(); 1625 if (NewReleases.empty()) break; 1626 1627 // Back the other way. 1628 for (Instruction *NewRelease : NewReleases) { 1629 auto It = Releases.find(NewRelease); 1630 assert(It != Releases.end()); 1631 const RRInfo &NewReleaseRRI = It->second; 1632 KnownSafeBU &= NewReleaseRRI.KnownSafe; 1633 CFGHazardAfflicted |= NewReleaseRRI.CFGHazardAfflicted; 1634 for (Instruction *NewReleaseRetain : NewReleaseRRI.Calls) { 1635 auto Jt = Retains.find(NewReleaseRetain); 1636 if (Jt == Retains.end()) 1637 return false; 1638 const RRInfo &NewReleaseRetainRRI = Jt->second; 1639 1640 // If the retain does not have a reference to the release as well, 1641 // something happened which is unaccounted for. Do not do anything. 1642 // 1643 // This can happen if we catch an additive overflow during path count 1644 // merging. 1645 if (!NewReleaseRetainRRI.Calls.count(NewRelease)) 1646 return false; 1647 1648 if (RetainsToMove.Calls.insert(NewReleaseRetain).second) { 1649 // If we overflow when we compute the path count, don't remove/move 1650 // anything. 1651 const BBState &NRRBBState = BBStates[NewReleaseRetain->getParent()]; 1652 unsigned PathCount = BBState::OverflowOccurredValue; 1653 if (NRRBBState.GetAllPathCountWithOverflow(PathCount)) 1654 return false; 1655 assert(PathCount != BBState::OverflowOccurredValue && 1656 "PathCount at this point can not be " 1657 "OverflowOccurredValue."); 1658 OldDelta += PathCount; 1659 OldCount += PathCount; 1660 1661 // Collect the optimal insertion points. 1662 if (!KnownSafe) 1663 for (Instruction *RIP : NewReleaseRetainRRI.ReverseInsertPts) { 1664 if (RetainsToMove.ReverseInsertPts.insert(RIP).second) { 1665 // If we overflow when we compute the path count, don't 1666 // remove/move anything. 1667 const BBState &RIPBBState = BBStates[RIP->getParent()]; 1668 1669 PathCount = BBState::OverflowOccurredValue; 1670 if (RIPBBState.GetAllPathCountWithOverflow(PathCount)) 1671 return false; 1672 assert(PathCount != BBState::OverflowOccurredValue && 1673 "PathCount at this point can not be " 1674 "OverflowOccurredValue."); 1675 NewDelta += PathCount; 1676 NewCount += PathCount; 1677 } 1678 } 1679 NewRetains.push_back(NewReleaseRetain); 1680 } 1681 } 1682 } 1683 NewReleases.clear(); 1684 if (NewRetains.empty()) break; 1685 } 1686 1687 // We can only remove pointers if we are known safe in both directions. 1688 bool UnconditionallySafe = KnownSafeTD && KnownSafeBU; 1689 if (UnconditionallySafe) { 1690 RetainsToMove.ReverseInsertPts.clear(); 1691 ReleasesToMove.ReverseInsertPts.clear(); 1692 NewCount = 0; 1693 } else { 1694 // Determine whether the new insertion points we computed preserve the 1695 // balance of retain and release calls through the program. 1696 // TODO: If the fully aggressive solution isn't valid, try to find a 1697 // less aggressive solution which is. 1698 if (NewDelta != 0) 1699 return false; 1700 1701 // At this point, we are not going to remove any RR pairs, but we still are 1702 // able to move RR pairs. If one of our pointers is afflicted with 1703 // CFGHazards, we cannot perform such code motion so exit early. 1704 const bool WillPerformCodeMotion = RetainsToMove.ReverseInsertPts.size() || 1705 ReleasesToMove.ReverseInsertPts.size(); 1706 if (CFGHazardAfflicted && WillPerformCodeMotion) 1707 return false; 1708 } 1709 1710 // Determine whether the original call points are balanced in the retain and 1711 // release calls through the program. If not, conservatively don't touch 1712 // them. 1713 // TODO: It's theoretically possible to do code motion in this case, as 1714 // long as the existing imbalances are maintained. 1715 if (OldDelta != 0) 1716 return false; 1717 1718 Changed = true; 1719 assert(OldCount != 0 && "Unreachable code?"); 1720 NumRRs += OldCount - NewCount; 1721 // Set to true if we completely removed any RR pairs. 1722 AnyPairsCompletelyEliminated = NewCount == 0; 1723 1724 // We can move calls! 1725 return true; 1726 } 1727 1728 /// Identify pairings between the retains and releases, and delete and/or move 1729 /// them. 1730 bool ObjCARCOpt::PerformCodePlacement( 1731 DenseMap<const BasicBlock *, BBState> &BBStates, 1732 BlotMapVector<Value *, RRInfo> &Retains, 1733 DenseMap<Value *, RRInfo> &Releases, Module *M) { 1734 DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n"); 1735 1736 bool AnyPairsCompletelyEliminated = false; 1737 RRInfo RetainsToMove; 1738 RRInfo ReleasesToMove; 1739 SmallVector<Instruction *, 4> NewRetains; 1740 SmallVector<Instruction *, 4> NewReleases; 1741 SmallVector<Instruction *, 8> DeadInsts; 1742 1743 // Visit each retain. 1744 for (BlotMapVector<Value *, RRInfo>::const_iterator I = Retains.begin(), 1745 E = Retains.end(); 1746 I != E; ++I) { 1747 Value *V = I->first; 1748 if (!V) continue; // blotted 1749 1750 Instruction *Retain = cast<Instruction>(V); 1751 1752 DEBUG(dbgs() << "Visiting: " << *Retain << "\n"); 1753 1754 Value *Arg = GetArgRCIdentityRoot(Retain); 1755 1756 // If the object being released is in static or stack storage, we know it's 1757 // not being managed by ObjC reference counting, so we can delete pairs 1758 // regardless of what possible decrements or uses lie between them. 1759 bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg); 1760 1761 // A constant pointer can't be pointing to an object on the heap. It may 1762 // be reference-counted, but it won't be deleted. 1763 if (const LoadInst *LI = dyn_cast<LoadInst>(Arg)) 1764 if (const GlobalVariable *GV = 1765 dyn_cast<GlobalVariable>( 1766 GetRCIdentityRoot(LI->getPointerOperand()))) 1767 if (GV->isConstant()) 1768 KnownSafe = true; 1769 1770 // Connect the dots between the top-down-collected RetainsToMove and 1771 // bottom-up-collected ReleasesToMove to form sets of related calls. 1772 NewRetains.push_back(Retain); 1773 bool PerformMoveCalls = PairUpRetainsAndReleases( 1774 BBStates, Retains, Releases, M, NewRetains, NewReleases, DeadInsts, 1775 RetainsToMove, ReleasesToMove, Arg, KnownSafe, 1776 AnyPairsCompletelyEliminated); 1777 1778 if (PerformMoveCalls) { 1779 // Ok, everything checks out and we're all set. Let's move/delete some 1780 // code! 1781 MoveCalls(Arg, RetainsToMove, ReleasesToMove, 1782 Retains, Releases, DeadInsts, M); 1783 } 1784 1785 // Clean up state for next retain. 1786 NewReleases.clear(); 1787 NewRetains.clear(); 1788 RetainsToMove.clear(); 1789 ReleasesToMove.clear(); 1790 } 1791 1792 // Now that we're done moving everything, we can delete the newly dead 1793 // instructions, as we no longer need them as insert points. 1794 while (!DeadInsts.empty()) 1795 EraseInstruction(DeadInsts.pop_back_val()); 1796 1797 return AnyPairsCompletelyEliminated; 1798 } 1799 1800 /// Weak pointer optimizations. 1801 void ObjCARCOpt::OptimizeWeakCalls(Function &F) { 1802 DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeWeakCalls ==\n"); 1803 1804 // First, do memdep-style RLE and S2L optimizations. We can't use memdep 1805 // itself because it uses AliasAnalysis and we need to do provenance 1806 // queries instead. 1807 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { 1808 Instruction *Inst = &*I++; 1809 1810 DEBUG(dbgs() << "Visiting: " << *Inst << "\n"); 1811 1812 ARCInstKind Class = GetBasicARCInstKind(Inst); 1813 if (Class != ARCInstKind::LoadWeak && 1814 Class != ARCInstKind::LoadWeakRetained) 1815 continue; 1816 1817 // Delete objc_loadWeak calls with no users. 1818 if (Class == ARCInstKind::LoadWeak && Inst->use_empty()) { 1819 Inst->eraseFromParent(); 1820 continue; 1821 } 1822 1823 // TODO: For now, just look for an earlier available version of this value 1824 // within the same block. Theoretically, we could do memdep-style non-local 1825 // analysis too, but that would want caching. A better approach would be to 1826 // use the technique that EarlyCSE uses. 1827 inst_iterator Current = std::prev(I); 1828 BasicBlock *CurrentBB = &*Current.getBasicBlockIterator(); 1829 for (BasicBlock::iterator B = CurrentBB->begin(), 1830 J = Current.getInstructionIterator(); 1831 J != B; --J) { 1832 Instruction *EarlierInst = &*std::prev(J); 1833 ARCInstKind EarlierClass = GetARCInstKind(EarlierInst); 1834 switch (EarlierClass) { 1835 case ARCInstKind::LoadWeak: 1836 case ARCInstKind::LoadWeakRetained: { 1837 // If this is loading from the same pointer, replace this load's value 1838 // with that one. 1839 CallInst *Call = cast<CallInst>(Inst); 1840 CallInst *EarlierCall = cast<CallInst>(EarlierInst); 1841 Value *Arg = Call->getArgOperand(0); 1842 Value *EarlierArg = EarlierCall->getArgOperand(0); 1843 switch (PA.getAA()->alias(Arg, EarlierArg)) { 1844 case MustAlias: 1845 Changed = true; 1846 // If the load has a builtin retain, insert a plain retain for it. 1847 if (Class == ARCInstKind::LoadWeakRetained) { 1848 Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain); 1849 CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call); 1850 CI->setTailCall(); 1851 } 1852 // Zap the fully redundant load. 1853 Call->replaceAllUsesWith(EarlierCall); 1854 Call->eraseFromParent(); 1855 goto clobbered; 1856 case MayAlias: 1857 case PartialAlias: 1858 goto clobbered; 1859 case NoAlias: 1860 break; 1861 } 1862 break; 1863 } 1864 case ARCInstKind::StoreWeak: 1865 case ARCInstKind::InitWeak: { 1866 // If this is storing to the same pointer and has the same size etc. 1867 // replace this load's value with the stored value. 1868 CallInst *Call = cast<CallInst>(Inst); 1869 CallInst *EarlierCall = cast<CallInst>(EarlierInst); 1870 Value *Arg = Call->getArgOperand(0); 1871 Value *EarlierArg = EarlierCall->getArgOperand(0); 1872 switch (PA.getAA()->alias(Arg, EarlierArg)) { 1873 case MustAlias: 1874 Changed = true; 1875 // If the load has a builtin retain, insert a plain retain for it. 1876 if (Class == ARCInstKind::LoadWeakRetained) { 1877 Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain); 1878 CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call); 1879 CI->setTailCall(); 1880 } 1881 // Zap the fully redundant load. 1882 Call->replaceAllUsesWith(EarlierCall->getArgOperand(1)); 1883 Call->eraseFromParent(); 1884 goto clobbered; 1885 case MayAlias: 1886 case PartialAlias: 1887 goto clobbered; 1888 case NoAlias: 1889 break; 1890 } 1891 break; 1892 } 1893 case ARCInstKind::MoveWeak: 1894 case ARCInstKind::CopyWeak: 1895 // TOOD: Grab the copied value. 1896 goto clobbered; 1897 case ARCInstKind::AutoreleasepoolPush: 1898 case ARCInstKind::None: 1899 case ARCInstKind::IntrinsicUser: 1900 case ARCInstKind::User: 1901 // Weak pointers are only modified through the weak entry points 1902 // (and arbitrary calls, which could call the weak entry points). 1903 break; 1904 default: 1905 // Anything else could modify the weak pointer. 1906 goto clobbered; 1907 } 1908 } 1909 clobbered:; 1910 } 1911 1912 // Then, for each destroyWeak with an alloca operand, check to see if 1913 // the alloca and all its users can be zapped. 1914 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { 1915 Instruction *Inst = &*I++; 1916 ARCInstKind Class = GetBasicARCInstKind(Inst); 1917 if (Class != ARCInstKind::DestroyWeak) 1918 continue; 1919 1920 CallInst *Call = cast<CallInst>(Inst); 1921 Value *Arg = Call->getArgOperand(0); 1922 if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) { 1923 for (User *U : Alloca->users()) { 1924 const Instruction *UserInst = cast<Instruction>(U); 1925 switch (GetBasicARCInstKind(UserInst)) { 1926 case ARCInstKind::InitWeak: 1927 case ARCInstKind::StoreWeak: 1928 case ARCInstKind::DestroyWeak: 1929 continue; 1930 default: 1931 goto done; 1932 } 1933 } 1934 Changed = true; 1935 for (auto UI = Alloca->user_begin(), UE = Alloca->user_end(); UI != UE;) { 1936 CallInst *UserInst = cast<CallInst>(*UI++); 1937 switch (GetBasicARCInstKind(UserInst)) { 1938 case ARCInstKind::InitWeak: 1939 case ARCInstKind::StoreWeak: 1940 // These functions return their second argument. 1941 UserInst->replaceAllUsesWith(UserInst->getArgOperand(1)); 1942 break; 1943 case ARCInstKind::DestroyWeak: 1944 // No return value. 1945 break; 1946 default: 1947 llvm_unreachable("alloca really is used!"); 1948 } 1949 UserInst->eraseFromParent(); 1950 } 1951 Alloca->eraseFromParent(); 1952 done:; 1953 } 1954 } 1955 } 1956 1957 /// Identify program paths which execute sequences of retains and releases which 1958 /// can be eliminated. 1959 bool ObjCARCOpt::OptimizeSequences(Function &F) { 1960 // Releases, Retains - These are used to store the results of the main flow 1961 // analysis. These use Value* as the key instead of Instruction* so that the 1962 // map stays valid when we get around to rewriting code and calls get 1963 // replaced by arguments. 1964 DenseMap<Value *, RRInfo> Releases; 1965 BlotMapVector<Value *, RRInfo> Retains; 1966 1967 // This is used during the traversal of the function to track the 1968 // states for each identified object at each block. 1969 DenseMap<const BasicBlock *, BBState> BBStates; 1970 1971 // Analyze the CFG of the function, and all instructions. 1972 bool NestingDetected = Visit(F, BBStates, Retains, Releases); 1973 1974 // Transform. 1975 bool AnyPairsCompletelyEliminated = PerformCodePlacement(BBStates, Retains, 1976 Releases, 1977 F.getParent()); 1978 1979 // Cleanup. 1980 MultiOwnersSet.clear(); 1981 1982 return AnyPairsCompletelyEliminated && NestingDetected; 1983 } 1984 1985 /// Check if there is a dependent call earlier that does not have anything in 1986 /// between the Retain and the call that can affect the reference count of their 1987 /// shared pointer argument. Note that Retain need not be in BB. 1988 static bool 1989 HasSafePathToPredecessorCall(const Value *Arg, Instruction *Retain, 1990 SmallPtrSetImpl<Instruction *> &DepInsts, 1991 SmallPtrSetImpl<const BasicBlock *> &Visited, 1992 ProvenanceAnalysis &PA) { 1993 FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain, 1994 DepInsts, Visited, PA); 1995 if (DepInsts.size() != 1) 1996 return false; 1997 1998 auto *Call = dyn_cast_or_null<CallInst>(*DepInsts.begin()); 1999 2000 // Check that the pointer is the return value of the call. 2001 if (!Call || Arg != Call) 2002 return false; 2003 2004 // Check that the call is a regular call. 2005 ARCInstKind Class = GetBasicARCInstKind(Call); 2006 return Class == ARCInstKind::CallOrUser || Class == ARCInstKind::Call; 2007 } 2008 2009 /// Find a dependent retain that precedes the given autorelease for which there 2010 /// is nothing in between the two instructions that can affect the ref count of 2011 /// Arg. 2012 static CallInst * 2013 FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB, 2014 Instruction *Autorelease, 2015 SmallPtrSetImpl<Instruction *> &DepInsts, 2016 SmallPtrSetImpl<const BasicBlock *> &Visited, 2017 ProvenanceAnalysis &PA) { 2018 FindDependencies(CanChangeRetainCount, Arg, 2019 BB, Autorelease, DepInsts, Visited, PA); 2020 if (DepInsts.size() != 1) 2021 return nullptr; 2022 2023 auto *Retain = dyn_cast_or_null<CallInst>(*DepInsts.begin()); 2024 2025 // Check that we found a retain with the same argument. 2026 if (!Retain || !IsRetain(GetBasicARCInstKind(Retain)) || 2027 GetArgRCIdentityRoot(Retain) != Arg) { 2028 return nullptr; 2029 } 2030 2031 return Retain; 2032 } 2033 2034 /// Look for an ``autorelease'' instruction dependent on Arg such that there are 2035 /// no instructions dependent on Arg that need a positive ref count in between 2036 /// the autorelease and the ret. 2037 static CallInst * 2038 FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB, 2039 ReturnInst *Ret, 2040 SmallPtrSetImpl<Instruction *> &DepInsts, 2041 SmallPtrSetImpl<const BasicBlock *> &V, 2042 ProvenanceAnalysis &PA) { 2043 FindDependencies(NeedsPositiveRetainCount, Arg, 2044 BB, Ret, DepInsts, V, PA); 2045 if (DepInsts.size() != 1) 2046 return nullptr; 2047 2048 auto *Autorelease = dyn_cast_or_null<CallInst>(*DepInsts.begin()); 2049 if (!Autorelease) 2050 return nullptr; 2051 ARCInstKind AutoreleaseClass = GetBasicARCInstKind(Autorelease); 2052 if (!IsAutorelease(AutoreleaseClass)) 2053 return nullptr; 2054 if (GetArgRCIdentityRoot(Autorelease) != Arg) 2055 return nullptr; 2056 2057 return Autorelease; 2058 } 2059 2060 /// Look for this pattern: 2061 /// \code 2062 /// %call = call i8* @something(...) 2063 /// %2 = call i8* @objc_retain(i8* %call) 2064 /// %3 = call i8* @objc_autorelease(i8* %2) 2065 /// ret i8* %3 2066 /// \endcode 2067 /// And delete the retain and autorelease. 2068 void ObjCARCOpt::OptimizeReturns(Function &F) { 2069 if (!F.getReturnType()->isPointerTy()) 2070 return; 2071 2072 DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeReturns ==\n"); 2073 2074 SmallPtrSet<Instruction *, 4> DependingInstructions; 2075 SmallPtrSet<const BasicBlock *, 4> Visited; 2076 for (BasicBlock &BB: F) { 2077 ReturnInst *Ret = dyn_cast<ReturnInst>(&BB.back()); 2078 2079 DEBUG(dbgs() << "Visiting: " << *Ret << "\n"); 2080 2081 if (!Ret) 2082 continue; 2083 2084 const Value *Arg = GetRCIdentityRoot(Ret->getOperand(0)); 2085 2086 // Look for an ``autorelease'' instruction that is a predecessor of Ret and 2087 // dependent on Arg such that there are no instructions dependent on Arg 2088 // that need a positive ref count in between the autorelease and Ret. 2089 CallInst *Autorelease = FindPredecessorAutoreleaseWithSafePath( 2090 Arg, &BB, Ret, DependingInstructions, Visited, PA); 2091 DependingInstructions.clear(); 2092 Visited.clear(); 2093 2094 if (!Autorelease) 2095 continue; 2096 2097 CallInst *Retain = FindPredecessorRetainWithSafePath( 2098 Arg, &BB, Autorelease, DependingInstructions, Visited, PA); 2099 DependingInstructions.clear(); 2100 Visited.clear(); 2101 2102 if (!Retain) 2103 continue; 2104 2105 // Check that there is nothing that can affect the reference count 2106 // between the retain and the call. Note that Retain need not be in BB. 2107 bool HasSafePathToCall = HasSafePathToPredecessorCall(Arg, Retain, 2108 DependingInstructions, 2109 Visited, PA); 2110 DependingInstructions.clear(); 2111 Visited.clear(); 2112 2113 if (!HasSafePathToCall) 2114 continue; 2115 2116 // If so, we can zap the retain and autorelease. 2117 Changed = true; 2118 ++NumRets; 2119 DEBUG(dbgs() << "Erasing: " << *Retain << "\nErasing: " 2120 << *Autorelease << "\n"); 2121 EraseInstruction(Retain); 2122 EraseInstruction(Autorelease); 2123 } 2124 } 2125 2126 #ifndef NDEBUG 2127 void 2128 ObjCARCOpt::GatherStatistics(Function &F, bool AfterOptimization) { 2129 llvm::Statistic &NumRetains = 2130 AfterOptimization? NumRetainsAfterOpt : NumRetainsBeforeOpt; 2131 llvm::Statistic &NumReleases = 2132 AfterOptimization? NumReleasesAfterOpt : NumReleasesBeforeOpt; 2133 2134 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { 2135 Instruction *Inst = &*I++; 2136 switch (GetBasicARCInstKind(Inst)) { 2137 default: 2138 break; 2139 case ARCInstKind::Retain: 2140 ++NumRetains; 2141 break; 2142 case ARCInstKind::Release: 2143 ++NumReleases; 2144 break; 2145 } 2146 } 2147 } 2148 #endif 2149 2150 bool ObjCARCOpt::doInitialization(Module &M) { 2151 if (!EnableARCOpts) 2152 return false; 2153 2154 // If nothing in the Module uses ARC, don't do anything. 2155 Run = ModuleHasARC(M); 2156 if (!Run) 2157 return false; 2158 2159 // Intuitively, objc_retain and others are nocapture, however in practice 2160 // they are not, because they return their argument value. And objc_release 2161 // calls finalizers which can have arbitrary side effects. 2162 MDKindCache.init(&M); 2163 2164 // Initialize our runtime entry point cache. 2165 EP.init(&M); 2166 2167 return false; 2168 } 2169 2170 bool ObjCARCOpt::runOnFunction(Function &F) { 2171 if (!EnableARCOpts) 2172 return false; 2173 2174 // If nothing in the Module uses ARC, don't do anything. 2175 if (!Run) 2176 return false; 2177 2178 Changed = false; 2179 2180 DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName() << " >>>" 2181 "\n"); 2182 2183 PA.setAA(&getAnalysis<AAResultsWrapperPass>().getAAResults()); 2184 2185 #ifndef NDEBUG 2186 if (AreStatisticsEnabled()) { 2187 GatherStatistics(F, false); 2188 } 2189 #endif 2190 2191 // This pass performs several distinct transformations. As a compile-time aid 2192 // when compiling code that isn't ObjC, skip these if the relevant ObjC 2193 // library functions aren't declared. 2194 2195 // Preliminary optimizations. This also computes UsedInThisFunction. 2196 OptimizeIndividualCalls(F); 2197 2198 // Optimizations for weak pointers. 2199 if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::LoadWeak)) | 2200 (1 << unsigned(ARCInstKind::LoadWeakRetained)) | 2201 (1 << unsigned(ARCInstKind::StoreWeak)) | 2202 (1 << unsigned(ARCInstKind::InitWeak)) | 2203 (1 << unsigned(ARCInstKind::CopyWeak)) | 2204 (1 << unsigned(ARCInstKind::MoveWeak)) | 2205 (1 << unsigned(ARCInstKind::DestroyWeak)))) 2206 OptimizeWeakCalls(F); 2207 2208 // Optimizations for retain+release pairs. 2209 if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Retain)) | 2210 (1 << unsigned(ARCInstKind::RetainRV)) | 2211 (1 << unsigned(ARCInstKind::RetainBlock)))) 2212 if (UsedInThisFunction & (1 << unsigned(ARCInstKind::Release))) 2213 // Run OptimizeSequences until it either stops making changes or 2214 // no retain+release pair nesting is detected. 2215 while (OptimizeSequences(F)) {} 2216 2217 // Optimizations if objc_autorelease is used. 2218 if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Autorelease)) | 2219 (1 << unsigned(ARCInstKind::AutoreleaseRV)))) 2220 OptimizeReturns(F); 2221 2222 // Gather statistics after optimization. 2223 #ifndef NDEBUG 2224 if (AreStatisticsEnabled()) { 2225 GatherStatistics(F, true); 2226 } 2227 #endif 2228 2229 DEBUG(dbgs() << "\n"); 2230 2231 return Changed; 2232 } 2233 2234 void ObjCARCOpt::releaseMemory() { 2235 PA.clear(); 2236 } 2237 2238 /// @} 2239 /// 2240