1 //===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a shrink wrapping variant of prolog/epilog insertion: 11 // - Spills and restores of callee-saved registers (CSRs) are placed in the 12 // machine CFG to tightly surround their uses so that execution paths that 13 // do not use CSRs do not pay the spill/restore penalty. 14 // 15 // - Avoiding placment of spills/restores in loops: if a CSR is used inside a 16 // loop the spills are placed in the loop preheader, and restores are 17 // placed in the loop exit nodes (the successors of loop _exiting_ nodes). 18 // 19 // - Covering paths without CSR uses: 20 // If a region in a CFG uses CSRs and has multiple entry and/or exit points, 21 // the use info for the CSRs inside the region is propagated outward in the 22 // CFG to ensure validity of the spill/restore placements. This decreases 23 // the effectiveness of shrink wrapping but does not require edge splitting 24 // in the machine CFG. 25 // 26 // This shrink wrapping implementation uses an iterative analysis to determine 27 // which basic blocks require spills and restores for CSRs. 28 // 29 // This pass uses MachineDominators and MachineLoopInfo. Loop information 30 // is used to prevent placement of callee-saved register spills/restores 31 // in the bodies of loops. 32 // 33 //===----------------------------------------------------------------------===// 34 35 #define DEBUG_TYPE "shrink-wrap" 36 37 #include "PrologEpilogInserter.h" 38 #include "llvm/ADT/DenseMap.h" 39 #include "llvm/ADT/PostOrderIterator.h" 40 #include "llvm/ADT/STLExtras.h" 41 #include "llvm/ADT/SparseBitVector.h" 42 #include "llvm/ADT/Statistic.h" 43 #include "llvm/CodeGen/MachineDominators.h" 44 #include "llvm/CodeGen/MachineFrameInfo.h" 45 #include "llvm/CodeGen/MachineInstr.h" 46 #include "llvm/CodeGen/MachineLoopInfo.h" 47 #include "llvm/CodeGen/MachineRegisterInfo.h" 48 #include "llvm/Support/CommandLine.h" 49 #include "llvm/Support/Compiler.h" 50 #include "llvm/Support/Debug.h" 51 #include "llvm/Target/TargetMachine.h" 52 #include "llvm/Target/TargetRegisterInfo.h" 53 #include <sstream> 54 55 using namespace llvm; 56 57 STATISTIC(numSRReduced, "Number of CSR spills+restores reduced."); 58 59 // Shrink Wrapping: 60 static cl::opt<bool> 61 ShrinkWrapping("shrink-wrap", 62 cl::desc("Shrink wrap callee-saved register spills/restores")); 63 64 // Shrink wrap only the specified function, a debugging aid. 65 static cl::opt<std::string> 66 ShrinkWrapFunc("shrink-wrap-func", cl::Hidden, 67 cl::desc("Shrink wrap the specified function"), 68 cl::value_desc("funcname"), 69 cl::init("")); 70 71 // Debugging level for shrink wrapping. 72 enum ShrinkWrapDebugLevel { 73 None, BasicInfo, Iterations, Details 74 }; 75 76 static cl::opt<enum ShrinkWrapDebugLevel> 77 ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden, 78 cl::desc("Print shrink wrapping debugging information"), 79 cl::values( 80 clEnumVal(None , "disable debug output"), 81 clEnumVal(BasicInfo , "print basic DF sets"), 82 clEnumVal(Iterations, "print SR sets for each iteration"), 83 clEnumVal(Details , "print all DF sets"), 84 clEnumValEnd)); 85 86 87 void PEI::getAnalysisUsage(AnalysisUsage &AU) const { 88 AU.setPreservesCFG(); 89 if (ShrinkWrapping || ShrinkWrapFunc != "") { 90 AU.addRequired<MachineLoopInfo>(); 91 AU.addRequired<MachineDominatorTree>(); 92 } 93 AU.addPreserved<MachineLoopInfo>(); 94 AU.addPreserved<MachineDominatorTree>(); 95 AU.addRequired<TargetPassConfig>(); 96 MachineFunctionPass::getAnalysisUsage(AU); 97 } 98 99 //===----------------------------------------------------------------------===// 100 // ShrinkWrapping implementation 101 //===----------------------------------------------------------------------===// 102 103 // Convienences for dealing with machine loops. 104 MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) { 105 assert(LP && "Machine loop is NULL."); 106 MachineBasicBlock* PHDR = LP->getLoopPreheader(); 107 MachineLoop* PLP = LP->getParentLoop(); 108 while (PLP) { 109 PHDR = PLP->getLoopPreheader(); 110 PLP = PLP->getParentLoop(); 111 } 112 return PHDR; 113 } 114 115 MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) { 116 if (LP == 0) 117 return 0; 118 MachineLoop* PLP = LP->getParentLoop(); 119 while (PLP) { 120 LP = PLP; 121 PLP = PLP->getParentLoop(); 122 } 123 return LP; 124 } 125 126 bool PEI::isReturnBlock(MachineBasicBlock* MBB) { 127 return (MBB && !MBB->empty() && MBB->back().isReturn()); 128 } 129 130 // Initialize shrink wrapping DFA sets, called before iterations. 131 void PEI::clearAnticAvailSets() { 132 AnticIn.clear(); 133 AnticOut.clear(); 134 AvailIn.clear(); 135 AvailOut.clear(); 136 } 137 138 // Clear all sets constructed by shrink wrapping. 139 void PEI::clearAllSets() { 140 ReturnBlocks.clear(); 141 clearAnticAvailSets(); 142 UsedCSRegs.clear(); 143 CSRUsed.clear(); 144 TLLoops.clear(); 145 CSRSave.clear(); 146 CSRRestore.clear(); 147 } 148 149 // Initialize all shrink wrapping data. 150 void PEI::initShrinkWrappingInfo() { 151 clearAllSets(); 152 EntryBlock = 0; 153 #ifndef NDEBUG 154 HasFastExitPath = false; 155 #endif 156 ShrinkWrapThisFunction = ShrinkWrapping; 157 // DEBUG: enable or disable shrink wrapping for the current function 158 // via --shrink-wrap-func=<funcname>. 159 #ifndef NDEBUG 160 if (ShrinkWrapFunc != "") { 161 std::string MFName = MF->getName().str(); 162 ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); 163 } 164 #endif 165 } 166 167 168 /// placeCSRSpillsAndRestores - determine which MBBs of the function 169 /// need save, restore code for callee-saved registers by doing a DF analysis 170 /// similar to the one used in code motion (GVNPRE). This produces maps of MBBs 171 /// to sets of registers (CSRs) for saves and restores. MachineLoopInfo 172 /// is used to ensure that CSR save/restore code is not placed inside loops. 173 /// This function computes the maps of MBBs -> CSRs to spill and restore 174 /// in CSRSave, CSRRestore. 175 /// 176 /// If shrink wrapping is not being performed, place all spills in 177 /// the entry block, all restores in return blocks. In this case, 178 /// CSRSave has a single mapping, CSRRestore has mappings for each 179 /// return block. 180 /// 181 void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { 182 183 DEBUG(MF = &Fn); 184 185 initShrinkWrappingInfo(); 186 187 DEBUG(if (ShrinkWrapThisFunction) { 188 dbgs() << "Place CSR spills/restores for " 189 << MF->getName() << "\n"; 190 }); 191 192 if (calculateSets(Fn)) 193 placeSpillsAndRestores(Fn); 194 } 195 196 /// calcAnticInOut - calculate the anticipated in/out reg sets 197 /// for the given MBB by looking forward in the MCFG at MBB's 198 /// successors. 199 /// 200 bool PEI::calcAnticInOut(MachineBasicBlock* MBB) { 201 bool changed = false; 202 203 // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB)) 204 SmallVector<MachineBasicBlock*, 4> successors; 205 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 206 SE = MBB->succ_end(); SI != SE; ++SI) { 207 MachineBasicBlock* SUCC = *SI; 208 if (SUCC != MBB) 209 successors.push_back(SUCC); 210 } 211 212 unsigned i = 0, e = successors.size(); 213 if (i != e) { 214 CSRegSet prevAnticOut = AnticOut[MBB]; 215 MachineBasicBlock* SUCC = successors[i]; 216 217 AnticOut[MBB] = AnticIn[SUCC]; 218 for (++i; i != e; ++i) { 219 SUCC = successors[i]; 220 AnticOut[MBB] &= AnticIn[SUCC]; 221 } 222 if (prevAnticOut != AnticOut[MBB]) 223 changed = true; 224 } 225 226 // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]); 227 CSRegSet prevAnticIn = AnticIn[MBB]; 228 AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB]; 229 if (prevAnticIn != AnticIn[MBB]) 230 changed = true; 231 return changed; 232 } 233 234 /// calcAvailInOut - calculate the available in/out reg sets 235 /// for the given MBB by looking backward in the MCFG at MBB's 236 /// predecessors. 237 /// 238 bool PEI::calcAvailInOut(MachineBasicBlock* MBB) { 239 bool changed = false; 240 241 // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB)) 242 SmallVector<MachineBasicBlock*, 4> predecessors; 243 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 244 PE = MBB->pred_end(); PI != PE; ++PI) { 245 MachineBasicBlock* PRED = *PI; 246 if (PRED != MBB) 247 predecessors.push_back(PRED); 248 } 249 250 unsigned i = 0, e = predecessors.size(); 251 if (i != e) { 252 CSRegSet prevAvailIn = AvailIn[MBB]; 253 MachineBasicBlock* PRED = predecessors[i]; 254 255 AvailIn[MBB] = AvailOut[PRED]; 256 for (++i; i != e; ++i) { 257 PRED = predecessors[i]; 258 AvailIn[MBB] &= AvailOut[PRED]; 259 } 260 if (prevAvailIn != AvailIn[MBB]) 261 changed = true; 262 } 263 264 // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]); 265 CSRegSet prevAvailOut = AvailOut[MBB]; 266 AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB]; 267 if (prevAvailOut != AvailOut[MBB]) 268 changed = true; 269 return changed; 270 } 271 272 /// calculateAnticAvail - build the sets anticipated and available 273 /// registers in the MCFG of the current function iteratively, 274 /// doing a combined forward and backward analysis. 275 /// 276 void PEI::calculateAnticAvail(MachineFunction &Fn) { 277 // Initialize data flow sets. 278 clearAnticAvailSets(); 279 280 // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. 281 bool changed = true; 282 unsigned iterations = 0; 283 while (changed) { 284 changed = false; 285 ++iterations; 286 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); 287 MBBI != MBBE; ++MBBI) { 288 MachineBasicBlock* MBB = MBBI; 289 290 // Calculate anticipated in, out regs at MBB from 291 // anticipated at successors of MBB. 292 changed |= calcAnticInOut(MBB); 293 294 // Calculate available in, out regs at MBB from 295 // available at predecessors of MBB. 296 changed |= calcAvailInOut(MBB); 297 } 298 } 299 300 DEBUG({ 301 if (ShrinkWrapDebugging >= Details) { 302 dbgs() 303 << "-----------------------------------------------------------\n" 304 << " Antic/Avail Sets:\n" 305 << "-----------------------------------------------------------\n" 306 << "iterations = " << iterations << "\n" 307 << "-----------------------------------------------------------\n" 308 << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n" 309 << "-----------------------------------------------------------\n"; 310 311 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); 312 MBBI != MBBE; ++MBBI) { 313 MachineBasicBlock* MBB = MBBI; 314 dumpSets(MBB); 315 } 316 317 dbgs() 318 << "-----------------------------------------------------------\n"; 319 } 320 }); 321 } 322 323 /// propagateUsesAroundLoop - copy used register info from MBB to all blocks 324 /// of the loop given by LP and its parent loops. This prevents spills/restores 325 /// from being placed in the bodies of loops. 326 /// 327 void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) { 328 if (! MBB || !LP) 329 return; 330 331 std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks(); 332 for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) { 333 MachineBasicBlock* LBB = loopBlocks[i]; 334 if (LBB == MBB) 335 continue; 336 if (CSRUsed[LBB].contains(CSRUsed[MBB])) 337 continue; 338 CSRUsed[LBB] |= CSRUsed[MBB]; 339 } 340 } 341 342 /// calculateSets - collect the CSRs used in this function, compute 343 /// the DF sets that describe the initial minimal regions in the 344 /// Machine CFG around which CSR spills and restores must be placed. 345 /// 346 /// Additionally, this function decides if shrink wrapping should 347 /// be disabled for the current function, checking the following: 348 /// 1. the current function has more than 500 MBBs: heuristic limit 349 /// on function size to reduce compile time impact of the current 350 /// iterative algorithm. 351 /// 2. all CSRs are used in the entry block. 352 /// 3. all CSRs are used in all immediate successors of the entry block. 353 /// 4. all CSRs are used in a subset of blocks, each of which dominates 354 /// all return blocks. These blocks, taken as a subgraph of the MCFG, 355 /// are equivalent to the entry block since all execution paths pass 356 /// through them. 357 /// 358 bool PEI::calculateSets(MachineFunction &Fn) { 359 // Sets used to compute spill, restore placement sets. 360 const std::vector<CalleeSavedInfo> CSI = 361 Fn.getFrameInfo()->getCalleeSavedInfo(); 362 363 // If no CSRs used, we are done. 364 if (CSI.empty()) { 365 DEBUG(if (ShrinkWrapThisFunction) 366 dbgs() << "DISABLED: " << Fn.getName() 367 << ": uses no callee-saved registers\n"); 368 return false; 369 } 370 371 // Save refs to entry and return blocks. 372 EntryBlock = Fn.begin(); 373 for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); 374 MBB != E; ++MBB) 375 if (isReturnBlock(MBB)) 376 ReturnBlocks.push_back(MBB); 377 378 // Determine if this function has fast exit paths. 379 DEBUG(if (ShrinkWrapThisFunction) 380 findFastExitPath()); 381 382 // Limit shrink wrapping via the current iterative bit vector 383 // implementation to functions with <= 500 MBBs. 384 if (Fn.size() > 500) { 385 DEBUG(if (ShrinkWrapThisFunction) 386 dbgs() << "DISABLED: " << Fn.getName() 387 << ": too large (" << Fn.size() << " MBBs)\n"); 388 ShrinkWrapThisFunction = false; 389 } 390 391 // Return now if not shrink wrapping. 392 if (! ShrinkWrapThisFunction) 393 return false; 394 395 // Collect set of used CSRs. 396 for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { 397 UsedCSRegs.set(inx); 398 } 399 400 // Walk instructions in all MBBs, create CSRUsed[] sets, choose 401 // whether or not to shrink wrap this function. 402 MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); 403 MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>(); 404 const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); 405 406 bool allCSRUsesInEntryBlock = true; 407 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); 408 MBBI != MBBE; ++MBBI) { 409 MachineBasicBlock* MBB = MBBI; 410 for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) { 411 for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { 412 unsigned Reg = CSI[inx].getReg(); 413 // If instruction I reads or modifies Reg, add it to UsedCSRegs, 414 // CSRUsed map for the current block. 415 for (unsigned opInx = 0, opEnd = I->getNumOperands(); 416 opInx != opEnd; ++opInx) { 417 const MachineOperand &MO = I->getOperand(opInx); 418 if (! (MO.isReg() && (MO.isUse() || MO.isDef()))) 419 continue; 420 unsigned MOReg = MO.getReg(); 421 if (!MOReg) 422 continue; 423 if (MOReg == Reg || 424 (TargetRegisterInfo::isPhysicalRegister(MOReg) && 425 TargetRegisterInfo::isPhysicalRegister(Reg) && 426 TRI->isSubRegister(Reg, MOReg))) { 427 // CSR Reg is defined/used in block MBB. 428 CSRUsed[MBB].set(inx); 429 // Check for uses in EntryBlock. 430 if (MBB != EntryBlock) 431 allCSRUsesInEntryBlock = false; 432 } 433 } 434 } 435 } 436 437 if (CSRUsed[MBB].empty()) 438 continue; 439 440 // Propagate CSRUsed[MBB] in loops 441 if (MachineLoop* LP = LI.getLoopFor(MBB)) { 442 // Add top level loop to work list. 443 MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP); 444 MachineLoop* PLP = getTopLevelLoopParent(LP); 445 446 if (! HDR) { 447 HDR = PLP->getHeader(); 448 assert(HDR->pred_size() > 0 && "Loop header has no predecessors?"); 449 MachineBasicBlock::pred_iterator PI = HDR->pred_begin(); 450 HDR = *PI; 451 } 452 TLLoops[HDR] = PLP; 453 454 // Push uses from inside loop to its parent loops, 455 // or to all other MBBs in its loop. 456 if (LP->getLoopDepth() > 1) { 457 for (MachineLoop* PLP = LP->getParentLoop(); PLP; 458 PLP = PLP->getParentLoop()) { 459 propagateUsesAroundLoop(MBB, PLP); 460 } 461 } else { 462 propagateUsesAroundLoop(MBB, LP); 463 } 464 } 465 } 466 467 if (allCSRUsesInEntryBlock) { 468 DEBUG(dbgs() << "DISABLED: " << Fn.getName() 469 << ": all CSRs used in EntryBlock\n"); 470 ShrinkWrapThisFunction = false; 471 } else { 472 bool allCSRsUsedInEntryFanout = true; 473 for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), 474 SE = EntryBlock->succ_end(); SI != SE; ++SI) { 475 MachineBasicBlock* SUCC = *SI; 476 if (CSRUsed[SUCC] != UsedCSRegs) 477 allCSRsUsedInEntryFanout = false; 478 } 479 if (allCSRsUsedInEntryFanout) { 480 DEBUG(dbgs() << "DISABLED: " << Fn.getName() 481 << ": all CSRs used in imm successors of EntryBlock\n"); 482 ShrinkWrapThisFunction = false; 483 } 484 } 485 486 if (ShrinkWrapThisFunction) { 487 // Check if MBB uses CSRs and dominates all exit nodes. 488 // Such nodes are equiv. to the entry node w.r.t. 489 // CSR uses: every path through the function must 490 // pass through this node. If each CSR is used at least 491 // once by these nodes, shrink wrapping is disabled. 492 CSRegSet CSRUsedInChokePoints; 493 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); 494 MBBI != MBBE; ++MBBI) { 495 MachineBasicBlock* MBB = MBBI; 496 if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1) 497 continue; 498 bool dominatesExitNodes = true; 499 for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) 500 if (! DT.dominates(MBB, ReturnBlocks[ri])) { 501 dominatesExitNodes = false; 502 break; 503 } 504 if (dominatesExitNodes) { 505 CSRUsedInChokePoints |= CSRUsed[MBB]; 506 if (CSRUsedInChokePoints == UsedCSRegs) { 507 DEBUG(dbgs() << "DISABLED: " << Fn.getName() 508 << ": all CSRs used in choke point(s) at " 509 << getBasicBlockName(MBB) << "\n"); 510 ShrinkWrapThisFunction = false; 511 break; 512 } 513 } 514 } 515 } 516 517 // Return now if we have decided not to apply shrink wrapping 518 // to the current function. 519 if (! ShrinkWrapThisFunction) 520 return false; 521 522 DEBUG({ 523 dbgs() << "ENABLED: " << Fn.getName(); 524 if (HasFastExitPath) 525 dbgs() << " (fast exit path)"; 526 dbgs() << "\n"; 527 if (ShrinkWrapDebugging >= BasicInfo) { 528 dbgs() << "------------------------------" 529 << "-----------------------------\n"; 530 dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; 531 if (ShrinkWrapDebugging >= Details) { 532 dbgs() << "------------------------------" 533 << "-----------------------------\n"; 534 dumpAllUsed(); 535 } 536 } 537 }); 538 539 // Build initial DF sets to determine minimal regions in the 540 // Machine CFG around which CSRs must be spilled and restored. 541 calculateAnticAvail(Fn); 542 543 return true; 544 } 545 546 /// addUsesForMEMERegion - add uses of CSRs spilled or restored in 547 /// multi-entry, multi-exit (MEME) regions so spill and restore 548 /// placement will not break code that enters or leaves a 549 /// shrink-wrapped region by inducing spills with no matching 550 /// restores or restores with no matching spills. A MEME region 551 /// is a subgraph of the MCFG with multiple entry edges, multiple 552 /// exit edges, or both. This code propagates use information 553 /// through the MCFG until all paths requiring spills and restores 554 /// _outside_ the computed minimal placement regions have been covered. 555 /// 556 bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, 557 SmallVector<MachineBasicBlock*, 4>& blks) { 558 if (MBB->succ_size() < 2 && MBB->pred_size() < 2) { 559 bool processThisBlock = false; 560 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 561 SE = MBB->succ_end(); SI != SE; ++SI) { 562 MachineBasicBlock* SUCC = *SI; 563 if (SUCC->pred_size() > 1) { 564 processThisBlock = true; 565 break; 566 } 567 } 568 if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) { 569 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 570 PE = MBB->pred_end(); PI != PE; ++PI) { 571 MachineBasicBlock* PRED = *PI; 572 if (PRED->succ_size() > 1) { 573 processThisBlock = true; 574 break; 575 } 576 } 577 } 578 if (! processThisBlock) 579 return false; 580 } 581 582 CSRegSet prop; 583 if (!CSRSave[MBB].empty()) 584 prop = CSRSave[MBB]; 585 else if (!CSRRestore[MBB].empty()) 586 prop = CSRRestore[MBB]; 587 else 588 prop = CSRUsed[MBB]; 589 if (prop.empty()) 590 return false; 591 592 // Propagate selected bits to successors, predecessors of MBB. 593 bool addedUses = false; 594 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 595 SE = MBB->succ_end(); SI != SE; ++SI) { 596 MachineBasicBlock* SUCC = *SI; 597 // Self-loop 598 if (SUCC == MBB) 599 continue; 600 if (! CSRUsed[SUCC].contains(prop)) { 601 CSRUsed[SUCC] |= prop; 602 addedUses = true; 603 blks.push_back(SUCC); 604 DEBUG(if (ShrinkWrapDebugging >= Iterations) 605 dbgs() << getBasicBlockName(MBB) 606 << "(" << stringifyCSRegSet(prop) << ")->" 607 << "successor " << getBasicBlockName(SUCC) << "\n"); 608 } 609 } 610 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 611 PE = MBB->pred_end(); PI != PE; ++PI) { 612 MachineBasicBlock* PRED = *PI; 613 // Self-loop 614 if (PRED == MBB) 615 continue; 616 if (! CSRUsed[PRED].contains(prop)) { 617 CSRUsed[PRED] |= prop; 618 addedUses = true; 619 blks.push_back(PRED); 620 DEBUG(if (ShrinkWrapDebugging >= Iterations) 621 dbgs() << getBasicBlockName(MBB) 622 << "(" << stringifyCSRegSet(prop) << ")->" 623 << "predecessor " << getBasicBlockName(PRED) << "\n"); 624 } 625 } 626 return addedUses; 627 } 628 629 /// addUsesForTopLevelLoops - add uses for CSRs used inside top 630 /// level loops to the exit blocks of those loops. 631 /// 632 bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) { 633 bool addedUses = false; 634 635 // Place restores for top level loops where needed. 636 for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator 637 I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) { 638 MachineBasicBlock* MBB = I->first; 639 MachineLoop* LP = I->second; 640 MachineBasicBlock* HDR = LP->getHeader(); 641 SmallVector<MachineBasicBlock*, 4> exitBlocks; 642 CSRegSet loopSpills; 643 644 loopSpills = CSRSave[MBB]; 645 if (CSRSave[MBB].empty()) { 646 loopSpills = CSRUsed[HDR]; 647 assert(!loopSpills.empty() && "No CSRs used in loop?"); 648 } else if (CSRRestore[MBB].contains(CSRSave[MBB])) 649 continue; 650 651 LP->getExitBlocks(exitBlocks); 652 assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?"); 653 for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) { 654 MachineBasicBlock* EXB = exitBlocks[i]; 655 if (! CSRUsed[EXB].contains(loopSpills)) { 656 CSRUsed[EXB] |= loopSpills; 657 addedUses = true; 658 DEBUG(if (ShrinkWrapDebugging >= Iterations) 659 dbgs() << "LOOP " << getBasicBlockName(MBB) 660 << "(" << stringifyCSRegSet(loopSpills) << ")->" 661 << getBasicBlockName(EXB) << "\n"); 662 if (EXB->succ_size() > 1 || EXB->pred_size() > 1) 663 blks.push_back(EXB); 664 } 665 } 666 } 667 return addedUses; 668 } 669 670 /// calcSpillPlacements - determine which CSRs should be spilled 671 /// in MBB using AnticIn sets of MBB's predecessors, keeping track 672 /// of changes to spilled reg sets. Add MBB to the set of blocks 673 /// that need to be processed for propagating use info to cover 674 /// multi-entry/exit regions. 675 /// 676 bool PEI::calcSpillPlacements(MachineBasicBlock* MBB, 677 SmallVector<MachineBasicBlock*, 4> &blks, 678 CSRegBlockMap &prevSpills) { 679 bool placedSpills = false; 680 // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB) 681 CSRegSet anticInPreds; 682 SmallVector<MachineBasicBlock*, 4> predecessors; 683 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 684 PE = MBB->pred_end(); PI != PE; ++PI) { 685 MachineBasicBlock* PRED = *PI; 686 if (PRED != MBB) 687 predecessors.push_back(PRED); 688 } 689 unsigned i = 0, e = predecessors.size(); 690 if (i != e) { 691 MachineBasicBlock* PRED = predecessors[i]; 692 anticInPreds = UsedCSRegs - AnticIn[PRED]; 693 for (++i; i != e; ++i) { 694 PRED = predecessors[i]; 695 anticInPreds &= (UsedCSRegs - AnticIn[PRED]); 696 } 697 } else { 698 // Handle uses in entry blocks (which have no predecessors). 699 // This is necessary because the DFA formulation assumes the 700 // entry and (multiple) exit nodes cannot have CSR uses, which 701 // is not the case in the real world. 702 anticInPreds = UsedCSRegs; 703 } 704 // Compute spills required at MBB: 705 CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds; 706 707 if (! CSRSave[MBB].empty()) { 708 if (MBB == EntryBlock) { 709 for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) 710 CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB]; 711 } else { 712 // Reset all regs spilled in MBB that are also spilled in EntryBlock. 713 if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) { 714 CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock]; 715 } 716 } 717 } 718 placedSpills = (CSRSave[MBB] != prevSpills[MBB]); 719 prevSpills[MBB] = CSRSave[MBB]; 720 // Remember this block for adding restores to successor 721 // blocks for multi-entry region. 722 if (placedSpills) 723 blks.push_back(MBB); 724 725 DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations) 726 dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " 727 << stringifyCSRegSet(CSRSave[MBB]) << "\n"); 728 729 return placedSpills; 730 } 731 732 /// calcRestorePlacements - determine which CSRs should be restored 733 /// in MBB using AvailOut sets of MBB's succcessors, keeping track 734 /// of changes to restored reg sets. Add MBB to the set of blocks 735 /// that need to be processed for propagating use info to cover 736 /// multi-entry/exit regions. 737 /// 738 bool PEI::calcRestorePlacements(MachineBasicBlock* MBB, 739 SmallVector<MachineBasicBlock*, 4> &blks, 740 CSRegBlockMap &prevRestores) { 741 bool placedRestores = false; 742 // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB) 743 CSRegSet availOutSucc; 744 SmallVector<MachineBasicBlock*, 4> successors; 745 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 746 SE = MBB->succ_end(); SI != SE; ++SI) { 747 MachineBasicBlock* SUCC = *SI; 748 if (SUCC != MBB) 749 successors.push_back(SUCC); 750 } 751 unsigned i = 0, e = successors.size(); 752 if (i != e) { 753 MachineBasicBlock* SUCC = successors[i]; 754 availOutSucc = UsedCSRegs - AvailOut[SUCC]; 755 for (++i; i != e; ++i) { 756 SUCC = successors[i]; 757 availOutSucc &= (UsedCSRegs - AvailOut[SUCC]); 758 } 759 } else { 760 if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) { 761 // Handle uses in return blocks (which have no successors). 762 // This is necessary because the DFA formulation assumes the 763 // entry and (multiple) exit nodes cannot have CSR uses, which 764 // is not the case in the real world. 765 availOutSucc = UsedCSRegs; 766 } 767 } 768 // Compute restores required at MBB: 769 CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc; 770 771 // Postprocess restore placements at MBB. 772 // Remove the CSRs that are restored in the return blocks. 773 // Lest this be confusing, note that: 774 // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks. 775 if (MBB->succ_size() && ! CSRRestore[MBB].empty()) { 776 if (! CSRSave[EntryBlock].empty()) 777 CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock]; 778 } 779 placedRestores = (CSRRestore[MBB] != prevRestores[MBB]); 780 prevRestores[MBB] = CSRRestore[MBB]; 781 // Remember this block for adding saves to predecessor 782 // blocks for multi-entry region. 783 if (placedRestores) 784 blks.push_back(MBB); 785 786 DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations) 787 dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = " 788 << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); 789 790 return placedRestores; 791 } 792 793 /// placeSpillsAndRestores - place spills and restores of CSRs 794 /// used in MBBs in minimal regions that contain the uses. 795 /// 796 void PEI::placeSpillsAndRestores(MachineFunction &Fn) { 797 CSRegBlockMap prevCSRSave; 798 CSRegBlockMap prevCSRRestore; 799 SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks; 800 bool changed = true; 801 unsigned iterations = 0; 802 803 // Iterate computation of spill and restore placements in the MCFG until: 804 // 1. CSR use info has been fully propagated around the MCFG, and 805 // 2. computation of CSRSave[], CSRRestore[] reach fixed points. 806 while (changed) { 807 changed = false; 808 ++iterations; 809 810 DEBUG(if (ShrinkWrapDebugging >= Iterations) 811 dbgs() << "iter " << iterations 812 << " --------------------------------------------------\n"); 813 814 // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG, 815 // which determines the placements of spills and restores. 816 // Keep track of changes to spills, restores in each iteration to 817 // minimize the total iterations. 818 bool SRChanged = false; 819 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); 820 MBBI != MBBE; ++MBBI) { 821 MachineBasicBlock* MBB = MBBI; 822 823 // Place spills for CSRs in MBB. 824 SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave); 825 826 // Place restores for CSRs in MBB. 827 SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore); 828 } 829 830 // Add uses of CSRs used inside loops where needed. 831 changed |= addUsesForTopLevelLoops(cvBlocks); 832 833 // Add uses for CSRs spilled or restored at branch, join points. 834 if (changed || SRChanged) { 835 while (! cvBlocks.empty()) { 836 MachineBasicBlock* MBB = cvBlocks.pop_back_val(); 837 changed |= addUsesForMEMERegion(MBB, ncvBlocks); 838 } 839 if (! ncvBlocks.empty()) { 840 cvBlocks = ncvBlocks; 841 ncvBlocks.clear(); 842 } 843 } 844 845 if (changed) { 846 calculateAnticAvail(Fn); 847 CSRSave.clear(); 848 CSRRestore.clear(); 849 } 850 } 851 852 // Check for effectiveness: 853 // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks} 854 // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock] 855 // Gives a measure of how many CSR spills have been moved from EntryBlock 856 // to minimal regions enclosing their uses. 857 CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]); 858 unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count(); 859 numSRReduced += numSRReducedThisFunc; 860 DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { 861 dbgs() << "-----------------------------------------------------------\n"; 862 dbgs() << "total iterations = " << iterations << " ( " 863 << Fn.getName() 864 << " " << numSRReducedThisFunc 865 << " " << Fn.size() 866 << " )\n"; 867 dbgs() << "-----------------------------------------------------------\n"; 868 dumpSRSets(); 869 dbgs() << "-----------------------------------------------------------\n"; 870 if (numSRReducedThisFunc) 871 verifySpillRestorePlacement(); 872 }); 873 } 874 875 // Debugging methods. 876 #ifndef NDEBUG 877 /// findFastExitPath - debugging method used to detect functions 878 /// with at least one path from the entry block to a return block 879 /// directly or which has a very small number of edges. 880 /// 881 void PEI::findFastExitPath() { 882 if (! EntryBlock) 883 return; 884 // Fina a path from EntryBlock to any return block that does not branch: 885 // Entry 886 // | ... 887 // v | 888 // B1<-----+ 889 // | 890 // v 891 // Return 892 for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), 893 SE = EntryBlock->succ_end(); SI != SE; ++SI) { 894 MachineBasicBlock* SUCC = *SI; 895 896 // Assume positive, disprove existence of fast path. 897 HasFastExitPath = true; 898 899 // Check the immediate successors. 900 if (isReturnBlock(SUCC)) { 901 if (ShrinkWrapDebugging >= BasicInfo) 902 dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) 903 << "->" << getBasicBlockName(SUCC) << "\n"; 904 break; 905 } 906 // Traverse df from SUCC, look for a branch block. 907 std::string exitPath = getBasicBlockName(SUCC); 908 for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC), 909 BE = df_end(SUCC); BI != BE; ++BI) { 910 MachineBasicBlock* SBB = *BI; 911 // Reject paths with branch nodes. 912 if (SBB->succ_size() > 1) { 913 HasFastExitPath = false; 914 break; 915 } 916 exitPath += "->" + getBasicBlockName(SBB); 917 } 918 if (HasFastExitPath) { 919 if (ShrinkWrapDebugging >= BasicInfo) 920 dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) 921 << "->" << exitPath << "\n"; 922 break; 923 } 924 } 925 } 926 927 /// verifySpillRestorePlacement - check the current spill/restore 928 /// sets for safety. Attempt to find spills without restores or 929 /// restores without spills. 930 /// Spills: walk df from each MBB in spill set ensuring that 931 /// all CSRs spilled at MMBB are restored on all paths 932 /// from MBB to all exit blocks. 933 /// Restores: walk idf from each MBB in restore set ensuring that 934 /// all CSRs restored at MBB are spilled on all paths 935 /// reaching MBB. 936 /// 937 void PEI::verifySpillRestorePlacement() { 938 unsigned numReturnBlocks = 0; 939 for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); 940 MBBI != MBBE; ++MBBI) { 941 MachineBasicBlock* MBB = MBBI; 942 if (isReturnBlock(MBB) || MBB->succ_size() == 0) 943 ++numReturnBlocks; 944 } 945 for (CSRegBlockMap::iterator BI = CSRSave.begin(), 946 BE = CSRSave.end(); BI != BE; ++BI) { 947 MachineBasicBlock* MBB = BI->first; 948 CSRegSet spilled = BI->second; 949 CSRegSet restored; 950 951 if (spilled.empty()) 952 continue; 953 954 DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " 955 << stringifyCSRegSet(spilled) 956 << " RESTORE[" << getBasicBlockName(MBB) << "] = " 957 << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); 958 959 if (CSRRestore[MBB].intersects(spilled)) { 960 restored |= (CSRRestore[MBB] & spilled); 961 } 962 963 // Walk depth first from MBB to find restores of all CSRs spilled at MBB: 964 // we must find restores for all spills w/no intervening spills on all 965 // paths from MBB to all return blocks. 966 for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB), 967 BE = df_end(MBB); BI != BE; ++BI) { 968 MachineBasicBlock* SBB = *BI; 969 if (SBB == MBB) 970 continue; 971 // Stop when we encounter spills of any CSRs spilled at MBB that 972 // have not yet been seen to be restored. 973 if (CSRSave[SBB].intersects(spilled) && 974 !restored.contains(CSRSave[SBB] & spilled)) 975 break; 976 // Collect the CSRs spilled at MBB that are restored 977 // at this DF successor of MBB. 978 if (CSRRestore[SBB].intersects(spilled)) 979 restored |= (CSRRestore[SBB] & spilled); 980 // If we are at a retun block, check that the restores 981 // we have seen so far exhaust the spills at MBB, then 982 // reset the restores. 983 if (isReturnBlock(SBB) || SBB->succ_size() == 0) { 984 if (restored != spilled) { 985 CSRegSet notRestored = (spilled - restored); 986 DEBUG(dbgs() << MF->getName() << ": " 987 << stringifyCSRegSet(notRestored) 988 << " spilled at " << getBasicBlockName(MBB) 989 << " are never restored on path to return " 990 << getBasicBlockName(SBB) << "\n"); 991 } 992 restored.clear(); 993 } 994 } 995 } 996 997 // Check restore placements. 998 for (CSRegBlockMap::iterator BI = CSRRestore.begin(), 999 BE = CSRRestore.end(); BI != BE; ++BI) { 1000 MachineBasicBlock* MBB = BI->first; 1001 CSRegSet restored = BI->second; 1002 CSRegSet spilled; 1003 1004 if (restored.empty()) 1005 continue; 1006 1007 DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " 1008 << stringifyCSRegSet(CSRSave[MBB]) 1009 << " RESTORE[" << getBasicBlockName(MBB) << "] = " 1010 << stringifyCSRegSet(restored) << "\n"); 1011 1012 if (CSRSave[MBB].intersects(restored)) { 1013 spilled |= (CSRSave[MBB] & restored); 1014 } 1015 // Walk inverse depth first from MBB to find spills of all 1016 // CSRs restored at MBB: 1017 for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB), 1018 BE = idf_end(MBB); BI != BE; ++BI) { 1019 MachineBasicBlock* PBB = *BI; 1020 if (PBB == MBB) 1021 continue; 1022 // Stop when we encounter restores of any CSRs restored at MBB that 1023 // have not yet been seen to be spilled. 1024 if (CSRRestore[PBB].intersects(restored) && 1025 !spilled.contains(CSRRestore[PBB] & restored)) 1026 break; 1027 // Collect the CSRs restored at MBB that are spilled 1028 // at this DF predecessor of MBB. 1029 if (CSRSave[PBB].intersects(restored)) 1030 spilled |= (CSRSave[PBB] & restored); 1031 } 1032 if (spilled != restored) { 1033 CSRegSet notSpilled = (restored - spilled); 1034 DEBUG(dbgs() << MF->getName() << ": " 1035 << stringifyCSRegSet(notSpilled) 1036 << " restored at " << getBasicBlockName(MBB) 1037 << " are never spilled\n"); 1038 } 1039 } 1040 } 1041 1042 // Debugging print methods. 1043 std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) { 1044 if (!MBB) 1045 return ""; 1046 1047 if (MBB->getBasicBlock()) 1048 return MBB->getBasicBlock()->getName().str(); 1049 1050 std::ostringstream name; 1051 name << "_MBB_" << MBB->getNumber(); 1052 return name.str(); 1053 } 1054 1055 std::string PEI::stringifyCSRegSet(const CSRegSet& s) { 1056 const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); 1057 const std::vector<CalleeSavedInfo> CSI = 1058 MF->getFrameInfo()->getCalleeSavedInfo(); 1059 1060 std::ostringstream srep; 1061 if (CSI.size() == 0) { 1062 srep << "[]"; 1063 return srep.str(); 1064 } 1065 srep << "["; 1066 CSRegSet::iterator I = s.begin(), E = s.end(); 1067 if (I != E) { 1068 unsigned reg = CSI[*I].getReg(); 1069 srep << TRI->getName(reg); 1070 for (++I; I != E; ++I) { 1071 reg = CSI[*I].getReg(); 1072 srep << ","; 1073 srep << TRI->getName(reg); 1074 } 1075 } 1076 srep << "]"; 1077 return srep.str(); 1078 } 1079 1080 void PEI::dumpSet(const CSRegSet& s) { 1081 DEBUG(dbgs() << stringifyCSRegSet(s) << "\n"); 1082 } 1083 1084 void PEI::dumpUsed(MachineBasicBlock* MBB) { 1085 DEBUG({ 1086 if (MBB) 1087 dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = " 1088 << stringifyCSRegSet(CSRUsed[MBB]) << "\n"; 1089 }); 1090 } 1091 1092 void PEI::dumpAllUsed() { 1093 for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); 1094 MBBI != MBBE; ++MBBI) { 1095 MachineBasicBlock* MBB = MBBI; 1096 dumpUsed(MBB); 1097 } 1098 } 1099 1100 void PEI::dumpSets(MachineBasicBlock* MBB) { 1101 DEBUG({ 1102 if (MBB) 1103 dbgs() << getBasicBlockName(MBB) << " | " 1104 << stringifyCSRegSet(CSRUsed[MBB]) << " | " 1105 << stringifyCSRegSet(AnticIn[MBB]) << " | " 1106 << stringifyCSRegSet(AnticOut[MBB]) << " | " 1107 << stringifyCSRegSet(AvailIn[MBB]) << " | " 1108 << stringifyCSRegSet(AvailOut[MBB]) << "\n"; 1109 }); 1110 } 1111 1112 void PEI::dumpSets1(MachineBasicBlock* MBB) { 1113 DEBUG({ 1114 if (MBB) 1115 dbgs() << getBasicBlockName(MBB) << " | " 1116 << stringifyCSRegSet(CSRUsed[MBB]) << " | " 1117 << stringifyCSRegSet(AnticIn[MBB]) << " | " 1118 << stringifyCSRegSet(AnticOut[MBB]) << " | " 1119 << stringifyCSRegSet(AvailIn[MBB]) << " | " 1120 << stringifyCSRegSet(AvailOut[MBB]) << " | " 1121 << stringifyCSRegSet(CSRSave[MBB]) << " | " 1122 << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; 1123 }); 1124 } 1125 1126 void PEI::dumpAllSets() { 1127 for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); 1128 MBBI != MBBE; ++MBBI) { 1129 MachineBasicBlock* MBB = MBBI; 1130 dumpSets1(MBB); 1131 } 1132 } 1133 1134 void PEI::dumpSRSets() { 1135 DEBUG({ 1136 for (MachineFunction::iterator MBB = MF->begin(), E = MF->end(); 1137 MBB != E; ++MBB) { 1138 if (!CSRSave[MBB].empty()) { 1139 dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " 1140 << stringifyCSRegSet(CSRSave[MBB]); 1141 if (CSRRestore[MBB].empty()) 1142 dbgs() << '\n'; 1143 } 1144 1145 if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty()) 1146 dbgs() << " " 1147 << "RESTORE[" << getBasicBlockName(MBB) << "] = " 1148 << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; 1149 } 1150 }); 1151 } 1152 #endif 1153