1 //===- subzero/src/IceRegAlloc.cpp - Linear-scan implementation -----------===// 2 // 3 // The Subzero Code Generator 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Implements the LinearScan class, which performs the linear-scan 12 /// register allocation after liveness analysis has been performed. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "IceRegAlloc.h" 17 18 #include "IceBitVector.h" 19 #include "IceCfg.h" 20 #include "IceCfgNode.h" 21 #include "IceInst.h" 22 #include "IceInstVarIter.h" 23 #include "IceOperand.h" 24 #include "IceTargetLowering.h" 25 26 #include "llvm/Support/Format.h" 27 28 namespace Ice { 29 30 namespace { 31 32 // Returns true if Var has any definitions within Item's live range. 33 // TODO(stichnot): Consider trimming the Definitions list similar to how the 34 // live ranges are trimmed, since all the overlapsDefs() tests are whether some 35 // variable's definitions overlap Cur, and trimming is with respect Cur.start. 36 // Initial tests show no measurable performance difference, so we'll keep the 37 // code simple for now. 38 bool overlapsDefs(const Cfg *Func, const Variable *Item, const Variable *Var) { 39 constexpr bool UseTrimmed = true; 40 VariablesMetadata *VMetadata = Func->getVMetadata(); 41 if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var)) 42 if (Item->getLiveRange().overlapsInst(FirstDef->getNumber(), UseTrimmed)) 43 return true; 44 for (const Inst *Def : VMetadata->getLatterDefinitions(Var)) { 45 if (Item->getLiveRange().overlapsInst(Def->getNumber(), UseTrimmed)) 46 return true; 47 } 48 return false; 49 } 50 51 void dumpDisableOverlap(const Cfg *Func, const Variable *Var, 52 const char *Reason) { 53 if (!BuildDefs::dump()) 54 return; 55 if (!Func->isVerbose(IceV_LinearScan)) 56 return; 57 58 VariablesMetadata *VMetadata = Func->getVMetadata(); 59 Ostream &Str = Func->getContext()->getStrDump(); 60 Str << "Disabling Overlap due to " << Reason << " " << *Var 61 << " LIVE=" << Var->getLiveRange() << " Defs="; 62 if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var)) 63 Str << FirstDef->getNumber(); 64 const InstDefList &Defs = VMetadata->getLatterDefinitions(Var); 65 for (size_t i = 0; i < Defs.size(); ++i) { 66 Str << "," << Defs[i]->getNumber(); 67 } 68 Str << "\n"; 69 } 70 71 void dumpLiveRange(const Variable *Var, const Cfg *Func) { 72 if (!BuildDefs::dump()) 73 return; 74 Ostream &Str = Func->getContext()->getStrDump(); 75 Str << "R="; 76 if (Var->hasRegTmp()) { 77 Str << llvm::format("%2d", int(Var->getRegNumTmp())); 78 } else { 79 Str << "NA"; 80 } 81 Str << " V="; 82 Var->dump(Func); 83 Str << " Range=" << Var->getLiveRange(); 84 } 85 86 int32_t findMinWeightIndex( 87 const SmallBitVector &RegMask, 88 const llvm::SmallVector<RegWeight, LinearScan::REGS_SIZE> &Weights) { 89 int MinWeightIndex = -1; 90 for (RegNumT i : RegNumBVIter(RegMask)) { 91 if (MinWeightIndex < 0 || Weights[i] < Weights[MinWeightIndex]) 92 MinWeightIndex = i; 93 } 94 assert(getFlags().getRegAllocReserve() || MinWeightIndex >= 0); 95 return MinWeightIndex; 96 } 97 98 } // end of anonymous namespace 99 100 LinearScan::LinearScan(Cfg *Func) 101 : Func(Func), Ctx(Func->getContext()), Target(Func->getTarget()), 102 Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)), 103 UseReserve(getFlags().getRegAllocReserve()) {} 104 105 // Prepare for full register allocation of all variables. We depend on liveness 106 // analysis to have calculated live ranges. 107 void LinearScan::initForGlobal() { 108 TimerMarker T(TimerStack::TT_initUnhandled, Func); 109 FindPreference = true; 110 // For full register allocation, normally we want to enable FindOverlap 111 // (meaning we look for opportunities for two overlapping live ranges to 112 // safely share the same register). However, we disable it for phi-lowering 113 // register allocation since no overlap opportunities should be available and 114 // it's more expensive to look for opportunities. 115 FindOverlap = (Kind != RAK_Phi); 116 Unhandled.reserve(Vars.size()); 117 UnhandledPrecolored.reserve(Vars.size()); 118 // Gather the live ranges of all variables and add them to the Unhandled set. 119 for (Variable *Var : Vars) { 120 // Don't consider rematerializable variables. 121 if (Var->isRematerializable()) 122 continue; 123 // Explicitly don't consider zero-weight variables, which are meant to be 124 // spill slots. 125 if (Var->mustNotHaveReg()) 126 continue; 127 // Don't bother if the variable has a null live range, which means it was 128 // never referenced. 129 if (Var->getLiveRange().isEmpty()) 130 continue; 131 Var->untrimLiveRange(); 132 Unhandled.push_back(Var); 133 if (Var->hasReg()) { 134 Var->setRegNumTmp(Var->getRegNum()); 135 Var->setMustHaveReg(); 136 UnhandledPrecolored.push_back(Var); 137 } 138 } 139 140 // Build the (ordered) list of FakeKill instruction numbers. 141 Kills.clear(); 142 // Phi lowering should not be creating new call instructions, so there should 143 // be no infinite-weight not-yet-colored live ranges that span a call 144 // instruction, hence no need to construct the Kills list. 145 if (Kind == RAK_Phi) 146 return; 147 for (CfgNode *Node : Func->getNodes()) { 148 for (Inst &I : Node->getInsts()) { 149 if (auto *Kill = llvm::dyn_cast<InstFakeKill>(&I)) { 150 if (!Kill->isDeleted() && !Kill->getLinked()->isDeleted()) 151 Kills.push_back(I.getNumber()); 152 } 153 } 154 } 155 } 156 157 // Validate the integrity of the live ranges. If there are any errors, it 158 // prints details and returns false. On success, it returns true. 159 bool LinearScan::livenessValidateIntervals( 160 const DefUseErrorList &DefsWithoutUses, 161 const DefUseErrorList &UsesBeforeDefs, 162 const CfgVector<InstNumberT> &LRBegin, 163 const CfgVector<InstNumberT> &LREnd) const { 164 if (DefsWithoutUses.empty() && UsesBeforeDefs.empty()) 165 return true; 166 167 if (!BuildDefs::dump()) 168 return false; 169 170 OstreamLocker L(Ctx); 171 Ostream &Str = Ctx->getStrDump(); 172 for (SizeT VarNum : DefsWithoutUses) { 173 Variable *Var = Vars[VarNum]; 174 Str << "LR def without use, instruction " << LRBegin[VarNum] 175 << ", variable " << Var->getName() << "\n"; 176 } 177 for (SizeT VarNum : UsesBeforeDefs) { 178 Variable *Var = Vars[VarNum]; 179 Str << "LR use before def, instruction " << LREnd[VarNum] << ", variable " 180 << Var->getName() << "\n"; 181 } 182 return false; 183 } 184 185 // Prepare for very simple register allocation of only infinite-weight Variables 186 // while respecting pre-colored Variables. Some properties we take advantage of: 187 // 188 // * Live ranges of interest consist of a single segment. 189 // 190 // * Live ranges of interest never span a call instruction. 191 // 192 // * Phi instructions are not considered because either phis have already been 193 // lowered, or they don't contain any pre-colored or infinite-weight 194 // Variables. 195 // 196 // * We don't need to renumber instructions before computing live ranges because 197 // all the high-level ICE instructions are deleted prior to lowering, and the 198 // low-level instructions are added in monotonically increasing order. 199 // 200 // * There are no opportunities for register preference or allowing overlap. 201 // 202 // Some properties we aren't (yet) taking advantage of: 203 // 204 // * Because live ranges are a single segment, the Inactive set will always be 205 // empty, and the live range trimming operation is unnecessary. 206 // 207 // * Calculating overlap of single-segment live ranges could be optimized a bit. 208 void LinearScan::initForInfOnly() { 209 TimerMarker T(TimerStack::TT_initUnhandled, Func); 210 FindPreference = false; 211 FindOverlap = false; 212 SizeT NumVars = 0; 213 214 // Iterate across all instructions and record the begin and end of the live 215 // range for each variable that is pre-colored or infinite weight. 216 CfgVector<InstNumberT> LRBegin(Vars.size(), Inst::NumberSentinel); 217 CfgVector<InstNumberT> LREnd(Vars.size(), Inst::NumberSentinel); 218 DefUseErrorList DefsWithoutUses, UsesBeforeDefs; 219 for (CfgNode *Node : Func->getNodes()) { 220 for (Inst &Instr : Node->getInsts()) { 221 if (Instr.isDeleted()) 222 continue; 223 FOREACH_VAR_IN_INST(Var, Instr) { 224 if (Var->getIgnoreLiveness()) 225 continue; 226 if (Var->hasReg() || Var->mustHaveReg()) { 227 SizeT VarNum = Var->getIndex(); 228 LREnd[VarNum] = Instr.getNumber(); 229 if (!Var->getIsArg() && LRBegin[VarNum] == Inst::NumberSentinel) 230 UsesBeforeDefs.push_back(VarNum); 231 } 232 } 233 if (const Variable *Var = Instr.getDest()) { 234 if (!Var->getIgnoreLiveness() && 235 (Var->hasReg() || Var->mustHaveReg())) { 236 if (LRBegin[Var->getIndex()] == Inst::NumberSentinel) { 237 LRBegin[Var->getIndex()] = Instr.getNumber(); 238 ++NumVars; 239 } 240 } 241 } 242 } 243 } 244 245 Unhandled.reserve(NumVars); 246 UnhandledPrecolored.reserve(NumVars); 247 for (SizeT i = 0; i < Vars.size(); ++i) { 248 Variable *Var = Vars[i]; 249 if (Var->isRematerializable()) 250 continue; 251 if (LRBegin[i] != Inst::NumberSentinel) { 252 if (LREnd[i] == Inst::NumberSentinel) { 253 DefsWithoutUses.push_back(i); 254 continue; 255 } 256 Unhandled.push_back(Var); 257 Var->resetLiveRange(); 258 Var->addLiveRange(LRBegin[i], LREnd[i]); 259 Var->untrimLiveRange(); 260 if (Var->hasReg()) { 261 Var->setRegNumTmp(Var->getRegNum()); 262 Var->setMustHaveReg(); 263 UnhandledPrecolored.push_back(Var); 264 } 265 --NumVars; 266 } 267 } 268 269 if (!livenessValidateIntervals(DefsWithoutUses, UsesBeforeDefs, LRBegin, 270 LREnd)) { 271 llvm::report_fatal_error("initForInfOnly: Liveness error"); 272 return; 273 } 274 275 if (!DefsWithoutUses.empty() || !UsesBeforeDefs.empty()) { 276 if (BuildDefs::dump()) { 277 OstreamLocker L(Ctx); 278 Ostream &Str = Ctx->getStrDump(); 279 for (SizeT VarNum : DefsWithoutUses) { 280 Variable *Var = Vars[VarNum]; 281 Str << "LR def without use, instruction " << LRBegin[VarNum] 282 << ", variable " << Var->getName() << "\n"; 283 } 284 for (SizeT VarNum : UsesBeforeDefs) { 285 Variable *Var = Vars[VarNum]; 286 Str << "LR use before def, instruction " << LREnd[VarNum] 287 << ", variable " << Var->getName() << "\n"; 288 } 289 } 290 llvm::report_fatal_error("initForInfOnly: Liveness error"); 291 } 292 // This isn't actually a fatal condition, but it would be nice to know if we 293 // somehow pre-calculated Unhandled's size wrong. 294 assert(NumVars == 0); 295 296 // Don't build up the list of Kills because we know that no infinite-weight 297 // Variable has a live range spanning a call. 298 Kills.clear(); 299 } 300 301 void LinearScan::initForSecondChance() { 302 TimerMarker T(TimerStack::TT_initUnhandled, Func); 303 FindPreference = true; 304 FindOverlap = true; 305 const VarList &Vars = Func->getVariables(); 306 Unhandled.reserve(Vars.size()); 307 UnhandledPrecolored.reserve(Vars.size()); 308 for (Variable *Var : Vars) { 309 if (Var->isRematerializable()) 310 continue; 311 if (Var->hasReg()) { 312 Var->untrimLiveRange(); 313 Var->setRegNumTmp(Var->getRegNum()); 314 Var->setMustHaveReg(); 315 UnhandledPrecolored.push_back(Var); 316 Unhandled.push_back(Var); 317 } 318 } 319 for (Variable *Var : Evicted) { 320 Var->untrimLiveRange(); 321 Unhandled.push_back(Var); 322 } 323 } 324 325 void LinearScan::init(RegAllocKind Kind, CfgSet<Variable *> ExcludeVars) { 326 this->Kind = Kind; 327 Unhandled.clear(); 328 UnhandledPrecolored.clear(); 329 Handled.clear(); 330 Inactive.clear(); 331 Active.clear(); 332 Vars.clear(); 333 Vars.reserve(Func->getVariables().size() - ExcludeVars.size()); 334 for (auto *Var : Func->getVariables()) { 335 if (ExcludeVars.find(Var) == ExcludeVars.end()) 336 Vars.emplace_back(Var); 337 } 338 339 SizeT NumRegs = Target->getNumRegisters(); 340 RegAliases.resize(NumRegs); 341 for (SizeT Reg = 0; Reg < NumRegs; ++Reg) { 342 RegAliases[Reg] = &Target->getAliasesForRegister(RegNumT::fromInt(Reg)); 343 } 344 345 switch (Kind) { 346 case RAK_Unknown: 347 llvm::report_fatal_error("Invalid RAK_Unknown"); 348 break; 349 case RAK_Global: 350 case RAK_Phi: 351 initForGlobal(); 352 break; 353 case RAK_InfOnly: 354 initForInfOnly(); 355 break; 356 case RAK_SecondChance: 357 initForSecondChance(); 358 break; 359 } 360 361 Evicted.clear(); 362 363 auto CompareRanges = [](const Variable *L, const Variable *R) { 364 InstNumberT Lstart = L->getLiveRange().getStart(); 365 InstNumberT Rstart = R->getLiveRange().getStart(); 366 if (Lstart == Rstart) 367 return L->getIndex() < R->getIndex(); 368 return Lstart < Rstart; 369 }; 370 // Do a reverse sort so that erasing elements (from the end) is fast. 371 std::sort(Unhandled.rbegin(), Unhandled.rend(), CompareRanges); 372 std::sort(UnhandledPrecolored.rbegin(), UnhandledPrecolored.rend(), 373 CompareRanges); 374 375 Handled.reserve(Unhandled.size()); 376 Inactive.reserve(Unhandled.size()); 377 Active.reserve(Unhandled.size()); 378 Evicted.reserve(Unhandled.size()); 379 } 380 381 // This is called when Cur must be allocated a register but no registers are 382 // available across Cur's live range. To handle this, we find a register that is 383 // not explicitly used during Cur's live range, spill that register to a stack 384 // location right before Cur's live range begins, and fill (reload) the register 385 // from the stack location right after Cur's live range ends. 386 void LinearScan::addSpillFill(IterationState &Iter) { 387 // Identify the actual instructions that begin and end Iter.Cur's live range. 388 // Iterate through Iter.Cur's node's instruction list until we find the actual 389 // instructions with instruction numbers corresponding to Iter.Cur's recorded 390 // live range endpoints. This sounds inefficient but shouldn't be a problem 391 // in practice because: 392 // (1) This function is almost never called in practice. 393 // (2) Since this register over-subscription problem happens only for 394 // phi-lowered instructions, the number of instructions in the node is 395 // proportional to the number of phi instructions in the original node, 396 // which is never very large in practice. 397 // (3) We still have to iterate through all instructions of Iter.Cur's live 398 // range to find all explicitly used registers (though the live range is 399 // usually only 2-3 instructions), so the main cost that could be avoided 400 // would be finding the instruction that begin's Iter.Cur's live range. 401 assert(!Iter.Cur->getLiveRange().isEmpty()); 402 InstNumberT Start = Iter.Cur->getLiveRange().getStart(); 403 InstNumberT End = Iter.Cur->getLiveRange().getEnd(); 404 CfgNode *Node = Func->getVMetadata()->getLocalUseNode(Iter.Cur); 405 assert(Node); 406 InstList &Insts = Node->getInsts(); 407 InstList::iterator SpillPoint = Insts.end(); 408 InstList::iterator FillPoint = Insts.end(); 409 // Stop searching after we have found both the SpillPoint and the FillPoint. 410 for (auto I = Insts.begin(), E = Insts.end(); 411 I != E && (SpillPoint == E || FillPoint == E); ++I) { 412 if (I->getNumber() == Start) 413 SpillPoint = I; 414 if (I->getNumber() == End) 415 FillPoint = I; 416 if (SpillPoint != E) { 417 // Remove from RegMask any physical registers referenced during Cur's live 418 // range. Start looking after SpillPoint gets set, i.e. once Cur's live 419 // range begins. 420 FOREACH_VAR_IN_INST(Var, *I) { 421 if (!Var->hasRegTmp()) 422 continue; 423 const auto &Aliases = *RegAliases[Var->getRegNumTmp()]; 424 for (RegNumT RegAlias : RegNumBVIter(Aliases)) { 425 Iter.RegMask[RegAlias] = false; 426 } 427 } 428 } 429 } 430 assert(SpillPoint != Insts.end()); 431 assert(FillPoint != Insts.end()); 432 ++FillPoint; 433 // TODO(stichnot): Randomize instead of *.begin() which maps to find_first(). 434 const RegNumT RegNum = *RegNumBVIter(Iter.RegMask).begin(); 435 Iter.Cur->setRegNumTmp(RegNum); 436 Variable *Preg = Target->getPhysicalRegister(RegNum, Iter.Cur->getType()); 437 // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that SpillLoc 438 // is correctly identified as !isMultiBlock(), reducing stack frame size. 439 Variable *SpillLoc = Func->makeVariable(Iter.Cur->getType()); 440 // Add "reg=FakeDef;spill=reg" before SpillPoint 441 Target->lowerInst(Node, SpillPoint, InstFakeDef::create(Func, Preg)); 442 Target->lowerInst(Node, SpillPoint, InstAssign::create(Func, SpillLoc, Preg)); 443 // add "reg=spill;FakeUse(reg)" before FillPoint 444 Target->lowerInst(Node, FillPoint, InstAssign::create(Func, Preg, SpillLoc)); 445 Target->lowerInst(Node, FillPoint, InstFakeUse::create(Func, Preg)); 446 } 447 448 void LinearScan::handleActiveRangeExpiredOrInactive(const Variable *Cur) { 449 for (SizeT I = Active.size(); I > 0; --I) { 450 const SizeT Index = I - 1; 451 Variable *Item = Active[Index]; 452 Item->trimLiveRange(Cur->getLiveRange().getStart()); 453 bool Moved = false; 454 if (Item->rangeEndsBefore(Cur)) { 455 // Move Item from Active to Handled list. 456 dumpLiveRangeTrace("Expiring ", Item); 457 moveItem(Active, Index, Handled); 458 Moved = true; 459 } else if (!Item->rangeOverlapsStart(Cur)) { 460 // Move Item from Active to Inactive list. 461 dumpLiveRangeTrace("Inactivating ", Item); 462 moveItem(Active, Index, Inactive); 463 Moved = true; 464 } 465 if (Moved) { 466 // Decrement Item from RegUses[]. 467 assert(Item->hasRegTmp()); 468 const auto &Aliases = *RegAliases[Item->getRegNumTmp()]; 469 for (RegNumT RegAlias : RegNumBVIter(Aliases)) { 470 --RegUses[RegAlias]; 471 assert(RegUses[RegAlias] >= 0); 472 } 473 } 474 } 475 } 476 477 void LinearScan::handleInactiveRangeExpiredOrReactivated(const Variable *Cur) { 478 for (SizeT I = Inactive.size(); I > 0; --I) { 479 const SizeT Index = I - 1; 480 Variable *Item = Inactive[Index]; 481 Item->trimLiveRange(Cur->getLiveRange().getStart()); 482 if (Item->rangeEndsBefore(Cur)) { 483 // Move Item from Inactive to Handled list. 484 dumpLiveRangeTrace("Expiring ", Item); 485 moveItem(Inactive, Index, Handled); 486 } else if (Item->rangeOverlapsStart(Cur)) { 487 // Move Item from Inactive to Active list. 488 dumpLiveRangeTrace("Reactivating ", Item); 489 moveItem(Inactive, Index, Active); 490 // Increment Item in RegUses[]. 491 assert(Item->hasRegTmp()); 492 const auto &Aliases = *RegAliases[Item->getRegNumTmp()]; 493 for (RegNumT RegAlias : RegNumBVIter(Aliases)) { 494 assert(RegUses[RegAlias] >= 0); 495 ++RegUses[RegAlias]; 496 } 497 } 498 } 499 } 500 501 // Infer register preference and allowable overlap. Only form a preference when 502 // the current Variable has an unambiguous "first" definition. The preference is 503 // some source Variable of the defining instruction that either is assigned a 504 // register that is currently free, or that is assigned a register that is not 505 // free but overlap is allowed. Overlap is allowed when the Variable under 506 // consideration is single-definition, and its definition is a simple assignment 507 // - i.e., the register gets copied/aliased but is never modified. Furthermore, 508 // overlap is only allowed when preferred Variable definition instructions do 509 // not appear within the current Variable's live range. 510 void LinearScan::findRegisterPreference(IterationState &Iter) { 511 Iter.Prefer = nullptr; 512 Iter.PreferReg = RegNumT(); 513 Iter.AllowOverlap = false; 514 515 if (!FindPreference) 516 return; 517 518 VariablesMetadata *VMetadata = Func->getVMetadata(); 519 const Inst *DefInst = VMetadata->getFirstDefinitionSingleBlock(Iter.Cur); 520 if (DefInst == nullptr) 521 return; 522 523 assert(DefInst->getDest() == Iter.Cur); 524 const bool IsSingleDefAssign = 525 DefInst->isVarAssign() && !VMetadata->isMultiDef(Iter.Cur); 526 FOREACH_VAR_IN_INST(SrcVar, *DefInst) { 527 // Only consider source variables that have (so far) been assigned a 528 // register. 529 if (!SrcVar->hasRegTmp()) 530 continue; 531 532 // That register must be one in the RegMask set, e.g. don't try to prefer 533 // the stack pointer as a result of the stacksave intrinsic. 534 const auto &Aliases = *RegAliases[SrcVar->getRegNumTmp()]; 535 const int SrcReg = (Iter.RegMask & Aliases).find_first(); 536 if (SrcReg == -1) 537 continue; 538 539 if (FindOverlap && IsSingleDefAssign && !Iter.Free[SrcReg]) { 540 // Don't bother trying to enable AllowOverlap if the register is already 541 // free (hence the test on Iter.Free[SrcReg]). 542 Iter.AllowOverlap = !overlapsDefs(Func, Iter.Cur, SrcVar); 543 } 544 if (Iter.AllowOverlap || Iter.Free[SrcReg]) { 545 Iter.Prefer = SrcVar; 546 Iter.PreferReg = RegNumT::fromInt(SrcReg); 547 // Stop looking for a preference after the first valid preference is 548 // found. One might think that we should look at all instruction 549 // variables to find the best <Prefer,AllowOverlap> combination, but note 550 // that AllowOverlap can only be true for a simple assignment statement 551 // which can have only one source operand, so it's not possible for 552 // AllowOverlap to be true beyond the first source operand. 553 FOREACH_VAR_IN_INST_BREAK; 554 } 555 } 556 if (BuildDefs::dump() && Verbose && Iter.Prefer) { 557 Ostream &Str = Ctx->getStrDump(); 558 Str << "Initial Iter.Prefer="; 559 Iter.Prefer->dump(Func); 560 Str << " R=" << Iter.PreferReg << " LIVE=" << Iter.Prefer->getLiveRange() 561 << " Overlap=" << Iter.AllowOverlap << "\n"; 562 } 563 } 564 565 // Remove registers from the Iter.Free[] list where an Inactive range overlaps 566 // with the current range. 567 void LinearScan::filterFreeWithInactiveRanges(IterationState &Iter) { 568 for (const Variable *Item : Inactive) { 569 if (!Item->rangeOverlaps(Iter.Cur)) 570 continue; 571 const auto &Aliases = *RegAliases[Item->getRegNumTmp()]; 572 for (RegNumT RegAlias : RegNumBVIter(Aliases)) { 573 // Don't assert(Iter.Free[RegAlias]) because in theory (though probably 574 // never in practice) there could be two inactive variables that were 575 // marked with AllowOverlap. 576 Iter.Free[RegAlias] = false; 577 Iter.FreeUnfiltered[RegAlias] = false; 578 // Disable AllowOverlap if an Inactive variable, which is not Prefer, 579 // shares Prefer's register, and has a definition within Cur's live range. 580 if (Iter.AllowOverlap && Item != Iter.Prefer && 581 RegAlias == Iter.PreferReg && overlapsDefs(Func, Iter.Cur, Item)) { 582 Iter.AllowOverlap = false; 583 dumpDisableOverlap(Func, Item, "Inactive"); 584 } 585 } 586 } 587 } 588 589 // Remove registers from the Iter.Free[] list where an Unhandled pre-colored 590 // range overlaps with the current range, and set those registers to infinite 591 // weight so that they aren't candidates for eviction. 592 // Cur->rangeEndsBefore(Item) is an early exit check that turns a guaranteed 593 // O(N^2) algorithm into expected linear complexity. 594 void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) { 595 // TODO(stichnot): Partition UnhandledPrecolored according to register class, 596 // to restrict the number of overlap comparisons needed. 597 for (Variable *Item : reverse_range(UnhandledPrecolored)) { 598 assert(Item->hasReg()); 599 if (Iter.Cur->rangeEndsBefore(Item)) 600 break; 601 if (!Item->rangeOverlaps(Iter.Cur)) 602 continue; 603 const auto &Aliases = 604 *RegAliases[Item->getRegNum()]; // Note: not getRegNumTmp() 605 for (RegNumT RegAlias : RegNumBVIter(Aliases)) { 606 Iter.Weights[RegAlias].setWeight(RegWeight::Inf); 607 Iter.Free[RegAlias] = false; 608 Iter.FreeUnfiltered[RegAlias] = false; 609 Iter.PrecoloredUnhandledMask[RegAlias] = true; 610 // Disable Iter.AllowOverlap if the preferred register is one of these 611 // pre-colored unhandled overlapping ranges. 612 if (Iter.AllowOverlap && RegAlias == Iter.PreferReg) { 613 Iter.AllowOverlap = false; 614 dumpDisableOverlap(Func, Item, "PrecoloredUnhandled"); 615 } 616 } 617 } 618 } 619 620 void LinearScan::allocatePrecoloredRegister(Variable *Cur) { 621 const auto RegNum = Cur->getRegNum(); 622 // RegNumTmp should have already been set above. 623 assert(Cur->getRegNumTmp() == RegNum); 624 dumpLiveRangeTrace("Precoloring ", Cur); 625 Active.push_back(Cur); 626 const auto &Aliases = *RegAliases[RegNum]; 627 for (RegNumT RegAlias : RegNumBVIter(Aliases)) { 628 assert(RegUses[RegAlias] >= 0); 629 ++RegUses[RegAlias]; 630 } 631 assert(!UnhandledPrecolored.empty()); 632 assert(UnhandledPrecolored.back() == Cur); 633 UnhandledPrecolored.pop_back(); 634 } 635 636 void LinearScan::allocatePreferredRegister(IterationState &Iter) { 637 Iter.Cur->setRegNumTmp(Iter.PreferReg); 638 dumpLiveRangeTrace("Preferring ", Iter.Cur); 639 const auto &Aliases = *RegAliases[Iter.PreferReg]; 640 for (RegNumT RegAlias : RegNumBVIter(Aliases)) { 641 assert(RegUses[RegAlias] >= 0); 642 ++RegUses[RegAlias]; 643 } 644 Active.push_back(Iter.Cur); 645 } 646 647 void LinearScan::allocateFreeRegister(IterationState &Iter, bool Filtered) { 648 const RegNumT RegNum = 649 *RegNumBVIter(Filtered ? Iter.Free : Iter.FreeUnfiltered).begin(); 650 Iter.Cur->setRegNumTmp(RegNum); 651 if (Filtered) 652 dumpLiveRangeTrace("Allocating Y ", Iter.Cur); 653 else 654 dumpLiveRangeTrace("Allocating X ", Iter.Cur); 655 const auto &Aliases = *RegAliases[RegNum]; 656 for (RegNumT RegAlias : RegNumBVIter(Aliases)) { 657 assert(RegUses[RegAlias] >= 0); 658 ++RegUses[RegAlias]; 659 } 660 Active.push_back(Iter.Cur); 661 } 662 663 void LinearScan::handleNoFreeRegisters(IterationState &Iter) { 664 // Check Active ranges. 665 for (const Variable *Item : Active) { 666 assert(Item->rangeOverlaps(Iter.Cur)); 667 assert(Item->hasRegTmp()); 668 const auto &Aliases = *RegAliases[Item->getRegNumTmp()]; 669 // We add the Item's weight to each alias/subregister to represent that, 670 // should we decide to pick any of them, then we would incur that many 671 // memory accesses. 672 RegWeight W = Item->getWeight(Func); 673 for (RegNumT RegAlias : RegNumBVIter(Aliases)) { 674 Iter.Weights[RegAlias].addWeight(W); 675 } 676 } 677 // Same as above, but check Inactive ranges instead of Active. 678 for (const Variable *Item : Inactive) { 679 if (!Item->rangeOverlaps(Iter.Cur)) 680 continue; 681 assert(Item->hasRegTmp()); 682 const auto &Aliases = *RegAliases[Item->getRegNumTmp()]; 683 RegWeight W = Item->getWeight(Func); 684 for (RegNumT RegAlias : RegNumBVIter(Aliases)) { 685 Iter.Weights[RegAlias].addWeight(W); 686 } 687 } 688 689 // All the weights are now calculated. Find the register with smallest weight. 690 int32_t MinWeightIndex = findMinWeightIndex(Iter.RegMask, Iter.Weights); 691 692 if (MinWeightIndex < 0 || 693 Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) { 694 if (!Iter.Cur->mustHaveReg()) { 695 // Iter.Cur doesn't have priority over any other live ranges, so don't 696 // allocate any register to it, and move it to the Handled state. 697 Handled.push_back(Iter.Cur); 698 return; 699 } 700 if (Kind == RAK_Phi) { 701 // Iter.Cur is infinite-weight but all physical registers are already 702 // taken, so we need to force one to be temporarily available. 703 addSpillFill(Iter); 704 Handled.push_back(Iter.Cur); 705 return; 706 } 707 // The remaining portion of the enclosing "if" block should only be 708 // reachable if we are manually limiting physical registers for testing. 709 if (UseReserve) { 710 if (Iter.FreeUnfiltered.any()) { 711 // There is some available physical register held in reserve, so use it. 712 constexpr bool NotFiltered = false; 713 allocateFreeRegister(Iter, NotFiltered); 714 // Iter.Cur is now on the Active list. 715 return; 716 } 717 // At this point, we need to find some reserve register that is already 718 // assigned to a non-infinite-weight variable. This could happen if some 719 // variable was previously assigned an alias of such a register. 720 MinWeightIndex = findMinWeightIndex(Iter.RegMaskUnfiltered, Iter.Weights); 721 } 722 if (Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) { 723 dumpLiveRangeTrace("Failing ", Iter.Cur); 724 Func->setError("Unable to find a physical register for an " 725 "infinite-weight live range " 726 "(consider using -reg-reserve): " + 727 Iter.Cur->getName()); 728 Handled.push_back(Iter.Cur); 729 return; 730 } 731 // At this point, MinWeightIndex points to a valid reserve register to 732 // reallocate to Iter.Cur, so drop into the eviction code. 733 } 734 735 // Evict all live ranges in Active that register number MinWeightIndex is 736 // assigned to. 737 const auto &Aliases = *RegAliases[MinWeightIndex]; 738 for (SizeT I = Active.size(); I > 0; --I) { 739 const SizeT Index = I - 1; 740 Variable *Item = Active[Index]; 741 const auto RegNum = Item->getRegNumTmp(); 742 if (Aliases[RegNum]) { 743 dumpLiveRangeTrace("Evicting A ", Item); 744 const auto &Aliases = *RegAliases[RegNum]; 745 for (RegNumT RegAlias : RegNumBVIter(Aliases)) { 746 --RegUses[RegAlias]; 747 assert(RegUses[RegAlias] >= 0); 748 } 749 Item->setRegNumTmp(RegNumT()); 750 moveItem(Active, Index, Handled); 751 Evicted.push_back(Item); 752 } 753 } 754 // Do the same for Inactive. 755 for (SizeT I = Inactive.size(); I > 0; --I) { 756 const SizeT Index = I - 1; 757 Variable *Item = Inactive[Index]; 758 // Note: The Item->rangeOverlaps(Cur) clause is not part of the description 759 // of AssignMemLoc() in the original paper. But there doesn't seem to be any 760 // need to evict an inactive live range that doesn't overlap with the live 761 // range currently being considered. It's especially bad if we would end up 762 // evicting an infinite-weight but currently-inactive live range. The most 763 // common situation for this would be a scratch register kill set for call 764 // instructions. 765 if (Aliases[Item->getRegNumTmp()] && Item->rangeOverlaps(Iter.Cur)) { 766 dumpLiveRangeTrace("Evicting I ", Item); 767 Item->setRegNumTmp(RegNumT()); 768 moveItem(Inactive, Index, Handled); 769 Evicted.push_back(Item); 770 } 771 } 772 // Assign the register to Cur. 773 Iter.Cur->setRegNumTmp(RegNumT::fromInt(MinWeightIndex)); 774 for (RegNumT RegAlias : RegNumBVIter(Aliases)) { 775 assert(RegUses[RegAlias] >= 0); 776 ++RegUses[RegAlias]; 777 } 778 Active.push_back(Iter.Cur); 779 dumpLiveRangeTrace("Allocating Z ", Iter.Cur); 780 } 781 782 void LinearScan::assignFinalRegisters(const SmallBitVector &RegMaskFull, 783 const SmallBitVector &PreDefinedRegisters, 784 bool Randomized) { 785 const size_t NumRegisters = RegMaskFull.size(); 786 llvm::SmallVector<RegNumT, REGS_SIZE> Permutation(NumRegisters); 787 if (Randomized) { 788 // Create a random number generator for regalloc randomization. Merge 789 // function's sequence and Kind value as the Salt. Because regAlloc() is 790 // called twice under O2, the second time with RAK_Phi, we check Kind == 791 // RAK_Phi to determine the lowest-order bit to make sure the Salt is 792 // different. 793 uint64_t Salt = 794 (Func->getSequenceNumber() << 1) ^ (Kind == RAK_Phi ? 0u : 1u); 795 Target->makeRandomRegisterPermutation( 796 Permutation, PreDefinedRegisters | ~RegMaskFull, Salt); 797 } 798 799 // Finish up by setting RegNum = RegNumTmp (or a random permutation thereof) 800 // for each Variable. 801 for (Variable *Item : Handled) { 802 const auto RegNum = Item->getRegNumTmp(); 803 auto AssignedRegNum = RegNum; 804 805 if (Randomized && Item->hasRegTmp() && !Item->hasReg()) { 806 AssignedRegNum = Permutation[RegNum]; 807 } 808 if (BuildDefs::dump() && Verbose) { 809 Ostream &Str = Ctx->getStrDump(); 810 if (!Item->hasRegTmp()) { 811 Str << "Not assigning "; 812 Item->dump(Func); 813 Str << "\n"; 814 } else { 815 Str << (AssignedRegNum == Item->getRegNum() ? "Reassigning " 816 : "Assigning ") 817 << Target->getRegName(AssignedRegNum, Item->getType()) << "(r" 818 << AssignedRegNum << ") to "; 819 Item->dump(Func); 820 Str << "\n"; 821 } 822 } 823 Item->setRegNum(AssignedRegNum); 824 } 825 } 826 827 // Implements the linear-scan algorithm. Based on "Linear Scan Register 828 // Allocation in the Context of SSA Form and Register Constraints" by Hanspeter 829 // Mssenbck and Michael Pfeiffer, 830 // ftp://ftp.ssw.uni-linz.ac.at/pub/Papers/Moe02.PDF. This implementation is 831 // modified to take affinity into account and allow two interfering variables to 832 // share the same register in certain cases. 833 // 834 // Requires running Cfg::liveness(Liveness_Intervals) in preparation. Results 835 // are assigned to Variable::RegNum for each Variable. 836 void LinearScan::scan(const SmallBitVector &RegMaskFull, bool Randomized) { 837 TimerMarker T(TimerStack::TT_linearScan, Func); 838 assert(RegMaskFull.any()); // Sanity check 839 if (Verbose) 840 Ctx->lockStr(); 841 Func->resetCurrentNode(); 842 const size_t NumRegisters = RegMaskFull.size(); 843 SmallBitVector PreDefinedRegisters(NumRegisters); 844 if (Randomized) { 845 for (Variable *Var : UnhandledPrecolored) { 846 PreDefinedRegisters[Var->getRegNum()] = true; 847 } 848 } 849 850 // Build a LiveRange representing the Kills list. 851 LiveRange KillsRange(Kills); 852 KillsRange.untrim(); 853 854 // Reset the register use count. 855 RegUses.resize(NumRegisters); 856 std::fill(RegUses.begin(), RegUses.end(), 0); 857 858 // Unhandled is already set to all ranges in increasing order of start points. 859 assert(Active.empty()); 860 assert(Inactive.empty()); 861 assert(Handled.empty()); 862 const TargetLowering::RegSetMask RegsInclude = 863 TargetLowering::RegSet_CallerSave; 864 const TargetLowering::RegSetMask RegsExclude = TargetLowering::RegSet_None; 865 const SmallBitVector KillsMask = 866 Target->getRegisterSet(RegsInclude, RegsExclude); 867 868 // Allocate memory once outside the loop. 869 IterationState Iter; 870 Iter.Weights.reserve(NumRegisters); 871 Iter.PrecoloredUnhandledMask.reserve(NumRegisters); 872 873 while (!Unhandled.empty()) { 874 Iter.Cur = Unhandled.back(); 875 Unhandled.pop_back(); 876 dumpLiveRangeTrace("\nConsidering ", Iter.Cur); 877 if (UseReserve) 878 assert(Target->getAllRegistersForVariable(Iter.Cur).any()); 879 else 880 assert(Target->getRegistersForVariable(Iter.Cur).any()); 881 Iter.RegMask = RegMaskFull & Target->getRegistersForVariable(Iter.Cur); 882 Iter.RegMaskUnfiltered = 883 RegMaskFull & Target->getAllRegistersForVariable(Iter.Cur); 884 KillsRange.trim(Iter.Cur->getLiveRange().getStart()); 885 886 // Check for pre-colored ranges. If Cur is pre-colored, it definitely gets 887 // that register. Previously processed live ranges would have avoided that 888 // register due to it being pre-colored. Future processed live ranges won't 889 // evict that register because the live range has infinite weight. 890 if (Iter.Cur->hasReg()) { 891 allocatePrecoloredRegister(Iter.Cur); 892 continue; 893 } 894 895 handleActiveRangeExpiredOrInactive(Iter.Cur); 896 handleInactiveRangeExpiredOrReactivated(Iter.Cur); 897 898 // Calculate available registers into Iter.Free[] and Iter.FreeUnfiltered[]. 899 Iter.Free = Iter.RegMask; 900 Iter.FreeUnfiltered = Iter.RegMaskUnfiltered; 901 for (SizeT i = 0; i < Iter.RegMask.size(); ++i) { 902 if (RegUses[i] > 0) { 903 Iter.Free[i] = false; 904 Iter.FreeUnfiltered[i] = false; 905 } 906 } 907 908 findRegisterPreference(Iter); 909 filterFreeWithInactiveRanges(Iter); 910 911 // Disable AllowOverlap if an Active variable, which is not Prefer, shares 912 // Prefer's register, and has a definition within Cur's live range. 913 if (Iter.AllowOverlap) { 914 const auto &Aliases = *RegAliases[Iter.PreferReg]; 915 for (const Variable *Item : Active) { 916 const RegNumT RegNum = Item->getRegNumTmp(); 917 if (Item != Iter.Prefer && Aliases[RegNum] && 918 overlapsDefs(Func, Iter.Cur, Item)) { 919 Iter.AllowOverlap = false; 920 dumpDisableOverlap(Func, Item, "Active"); 921 } 922 } 923 } 924 925 Iter.Weights.resize(Iter.RegMask.size()); 926 std::fill(Iter.Weights.begin(), Iter.Weights.end(), RegWeight()); 927 928 Iter.PrecoloredUnhandledMask.resize(Iter.RegMask.size()); 929 Iter.PrecoloredUnhandledMask.reset(); 930 931 filterFreeWithPrecoloredRanges(Iter); 932 933 // Remove scratch registers from the Iter.Free[] list, and mark their 934 // Iter.Weights[] as infinite, if KillsRange overlaps Cur's live range. 935 constexpr bool UseTrimmed = true; 936 if (Iter.Cur->getLiveRange().overlaps(KillsRange, UseTrimmed)) { 937 Iter.Free.reset(KillsMask); 938 Iter.FreeUnfiltered.reset(KillsMask); 939 for (RegNumT i : RegNumBVIter(KillsMask)) { 940 Iter.Weights[i].setWeight(RegWeight::Inf); 941 if (Iter.PreferReg == i) 942 Iter.AllowOverlap = false; 943 } 944 } 945 946 // Print info about physical register availability. 947 if (BuildDefs::dump() && Verbose) { 948 Ostream &Str = Ctx->getStrDump(); 949 for (RegNumT i : RegNumBVIter(Iter.RegMaskUnfiltered)) { 950 Str << Target->getRegName(i, Iter.Cur->getType()) << "(U=" << RegUses[i] 951 << ",F=" << Iter.Free[i] << ",P=" << Iter.PrecoloredUnhandledMask[i] 952 << ") "; 953 } 954 Str << "\n"; 955 } 956 957 if (Iter.Prefer && (Iter.AllowOverlap || Iter.Free[Iter.PreferReg])) { 958 // First choice: a preferred register that is either free or is allowed to 959 // overlap with its linked variable. 960 allocatePreferredRegister(Iter); 961 } else if (Iter.Free.any()) { 962 // Second choice: any free register. 963 constexpr bool Filtered = true; 964 allocateFreeRegister(Iter, Filtered); 965 } else { 966 // Fallback: there are no free registers, so we look for the lowest-weight 967 // register and see if Cur has higher weight. 968 handleNoFreeRegisters(Iter); 969 } 970 dump(Func); 971 } 972 973 // Move anything Active or Inactive to Handled for easier handling. 974 Handled.insert(Handled.end(), Active.begin(), Active.end()); 975 Active.clear(); 976 Handled.insert(Handled.end(), Inactive.begin(), Inactive.end()); 977 Inactive.clear(); 978 dump(Func); 979 980 assignFinalRegisters(RegMaskFull, PreDefinedRegisters, Randomized); 981 982 if (Verbose) 983 Ctx->unlockStr(); 984 } 985 986 // ======================== Dump routines ======================== // 987 988 void LinearScan::dumpLiveRangeTrace(const char *Label, const Variable *Item) { 989 if (!BuildDefs::dump()) 990 return; 991 992 if (Verbose) { 993 Ostream &Str = Ctx->getStrDump(); 994 Str << Label; 995 dumpLiveRange(Item, Func); 996 Str << "\n"; 997 } 998 } 999 1000 void LinearScan::dump(Cfg *Func) const { 1001 if (!BuildDefs::dump()) 1002 return; 1003 if (!Verbose) 1004 return; 1005 Ostream &Str = Func->getContext()->getStrDump(); 1006 Func->resetCurrentNode(); 1007 Str << "**** Current regalloc state:\n"; 1008 Str << "++++++ Handled:\n"; 1009 for (const Variable *Item : Handled) { 1010 dumpLiveRange(Item, Func); 1011 Str << "\n"; 1012 } 1013 Str << "++++++ Unhandled:\n"; 1014 for (const Variable *Item : reverse_range(Unhandled)) { 1015 dumpLiveRange(Item, Func); 1016 Str << "\n"; 1017 } 1018 Str << "++++++ Active:\n"; 1019 for (const Variable *Item : Active) { 1020 dumpLiveRange(Item, Func); 1021 Str << "\n"; 1022 } 1023 Str << "++++++ Inactive:\n"; 1024 for (const Variable *Item : Inactive) { 1025 dumpLiveRange(Item, Func); 1026 Str << "\n"; 1027 } 1028 } 1029 1030 } // end of namespace Ice 1031