Home | History | Annotate | Download | only in src
      1 //===- subzero/src/IceRegAlloc.cpp - Linear-scan implementation -----------===//
      2 //
      3 //                        The Subzero Code Generator
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 ///
     10 /// \file
     11 /// \brief Implements the LinearScan class, which performs the linear-scan
     12 /// register allocation after liveness analysis has been performed.
     13 ///
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "IceRegAlloc.h"
     17 
     18 #include "IceBitVector.h"
     19 #include "IceCfg.h"
     20 #include "IceCfgNode.h"
     21 #include "IceInst.h"
     22 #include "IceInstVarIter.h"
     23 #include "IceOperand.h"
     24 #include "IceTargetLowering.h"
     25 
     26 #include "llvm/Support/Format.h"
     27 
     28 namespace Ice {
     29 
     30 namespace {
     31 
     32 // Returns true if Var has any definitions within Item's live range.
     33 // TODO(stichnot): Consider trimming the Definitions list similar to how the
     34 // live ranges are trimmed, since all the overlapsDefs() tests are whether some
     35 // variable's definitions overlap Cur, and trimming is with respect Cur.start.
     36 // Initial tests show no measurable performance difference, so we'll keep the
     37 // code simple for now.
     38 bool overlapsDefs(const Cfg *Func, const Variable *Item, const Variable *Var) {
     39   constexpr bool UseTrimmed = true;
     40   VariablesMetadata *VMetadata = Func->getVMetadata();
     41   if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var))
     42     if (Item->getLiveRange().overlapsInst(FirstDef->getNumber(), UseTrimmed))
     43       return true;
     44   for (const Inst *Def : VMetadata->getLatterDefinitions(Var)) {
     45     if (Item->getLiveRange().overlapsInst(Def->getNumber(), UseTrimmed))
     46       return true;
     47   }
     48   return false;
     49 }
     50 
     51 void dumpDisableOverlap(const Cfg *Func, const Variable *Var,
     52                         const char *Reason) {
     53   if (!BuildDefs::dump())
     54     return;
     55   if (!Func->isVerbose(IceV_LinearScan))
     56     return;
     57 
     58   VariablesMetadata *VMetadata = Func->getVMetadata();
     59   Ostream &Str = Func->getContext()->getStrDump();
     60   Str << "Disabling Overlap due to " << Reason << " " << *Var
     61       << " LIVE=" << Var->getLiveRange() << " Defs=";
     62   if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var))
     63     Str << FirstDef->getNumber();
     64   const InstDefList &Defs = VMetadata->getLatterDefinitions(Var);
     65   for (size_t i = 0; i < Defs.size(); ++i) {
     66     Str << "," << Defs[i]->getNumber();
     67   }
     68   Str << "\n";
     69 }
     70 
     71 void dumpLiveRange(const Variable *Var, const Cfg *Func) {
     72   if (!BuildDefs::dump())
     73     return;
     74   Ostream &Str = Func->getContext()->getStrDump();
     75   Str << "R=";
     76   if (Var->hasRegTmp()) {
     77     Str << llvm::format("%2d", int(Var->getRegNumTmp()));
     78   } else {
     79     Str << "NA";
     80   }
     81   Str << "  V=";
     82   Var->dump(Func);
     83   Str << "  Range=" << Var->getLiveRange();
     84 }
     85 
     86 int32_t findMinWeightIndex(
     87     const SmallBitVector &RegMask,
     88     const llvm::SmallVector<RegWeight, LinearScan::REGS_SIZE> &Weights) {
     89   int MinWeightIndex = -1;
     90   for (RegNumT i : RegNumBVIter(RegMask)) {
     91     if (MinWeightIndex < 0 || Weights[i] < Weights[MinWeightIndex])
     92       MinWeightIndex = i;
     93   }
     94   assert(getFlags().getRegAllocReserve() || MinWeightIndex >= 0);
     95   return MinWeightIndex;
     96 }
     97 
     98 } // end of anonymous namespace
     99 
    100 LinearScan::LinearScan(Cfg *Func)
    101     : Func(Func), Ctx(Func->getContext()), Target(Func->getTarget()),
    102       Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)),
    103       UseReserve(getFlags().getRegAllocReserve()) {}
    104 
    105 // Prepare for full register allocation of all variables. We depend on liveness
    106 // analysis to have calculated live ranges.
    107 void LinearScan::initForGlobal() {
    108   TimerMarker T(TimerStack::TT_initUnhandled, Func);
    109   FindPreference = true;
    110   // For full register allocation, normally we want to enable FindOverlap
    111   // (meaning we look for opportunities for two overlapping live ranges to
    112   // safely share the same register). However, we disable it for phi-lowering
    113   // register allocation since no overlap opportunities should be available and
    114   // it's more expensive to look for opportunities.
    115   FindOverlap = (Kind != RAK_Phi);
    116   Unhandled.reserve(Vars.size());
    117   UnhandledPrecolored.reserve(Vars.size());
    118   // Gather the live ranges of all variables and add them to the Unhandled set.
    119   for (Variable *Var : Vars) {
    120     // Don't consider rematerializable variables.
    121     if (Var->isRematerializable())
    122       continue;
    123     // Explicitly don't consider zero-weight variables, which are meant to be
    124     // spill slots.
    125     if (Var->mustNotHaveReg())
    126       continue;
    127     // Don't bother if the variable has a null live range, which means it was
    128     // never referenced.
    129     if (Var->getLiveRange().isEmpty())
    130       continue;
    131     Var->untrimLiveRange();
    132     Unhandled.push_back(Var);
    133     if (Var->hasReg()) {
    134       Var->setRegNumTmp(Var->getRegNum());
    135       Var->setMustHaveReg();
    136       UnhandledPrecolored.push_back(Var);
    137     }
    138   }
    139 
    140   // Build the (ordered) list of FakeKill instruction numbers.
    141   Kills.clear();
    142   // Phi lowering should not be creating new call instructions, so there should
    143   // be no infinite-weight not-yet-colored live ranges that span a call
    144   // instruction, hence no need to construct the Kills list.
    145   if (Kind == RAK_Phi)
    146     return;
    147   for (CfgNode *Node : Func->getNodes()) {
    148     for (Inst &I : Node->getInsts()) {
    149       if (auto *Kill = llvm::dyn_cast<InstFakeKill>(&I)) {
    150         if (!Kill->isDeleted() && !Kill->getLinked()->isDeleted())
    151           Kills.push_back(I.getNumber());
    152       }
    153     }
    154   }
    155 }
    156 
    157 // Validate the integrity of the live ranges.  If there are any errors, it
    158 // prints details and returns false.  On success, it returns true.
    159 bool LinearScan::livenessValidateIntervals(
    160     const DefUseErrorList &DefsWithoutUses,
    161     const DefUseErrorList &UsesBeforeDefs,
    162     const CfgVector<InstNumberT> &LRBegin,
    163     const CfgVector<InstNumberT> &LREnd) const {
    164   if (DefsWithoutUses.empty() && UsesBeforeDefs.empty())
    165     return true;
    166 
    167   if (!BuildDefs::dump())
    168     return false;
    169 
    170   OstreamLocker L(Ctx);
    171   Ostream &Str = Ctx->getStrDump();
    172   for (SizeT VarNum : DefsWithoutUses) {
    173     Variable *Var = Vars[VarNum];
    174     Str << "LR def without use, instruction " << LRBegin[VarNum]
    175         << ", variable " << Var->getName() << "\n";
    176   }
    177   for (SizeT VarNum : UsesBeforeDefs) {
    178     Variable *Var = Vars[VarNum];
    179     Str << "LR use before def, instruction " << LREnd[VarNum] << ", variable "
    180         << Var->getName() << "\n";
    181   }
    182   return false;
    183 }
    184 
    185 // Prepare for very simple register allocation of only infinite-weight Variables
    186 // while respecting pre-colored Variables. Some properties we take advantage of:
    187 //
    188 // * Live ranges of interest consist of a single segment.
    189 //
    190 // * Live ranges of interest never span a call instruction.
    191 //
    192 // * Phi instructions are not considered because either phis have already been
    193 //   lowered, or they don't contain any pre-colored or infinite-weight
    194 //   Variables.
    195 //
    196 // * We don't need to renumber instructions before computing live ranges because
    197 //   all the high-level ICE instructions are deleted prior to lowering, and the
    198 //   low-level instructions are added in monotonically increasing order.
    199 //
    200 // * There are no opportunities for register preference or allowing overlap.
    201 //
    202 // Some properties we aren't (yet) taking advantage of:
    203 //
    204 // * Because live ranges are a single segment, the Inactive set will always be
    205 //   empty, and the live range trimming operation is unnecessary.
    206 //
    207 // * Calculating overlap of single-segment live ranges could be optimized a bit.
    208 void LinearScan::initForInfOnly() {
    209   TimerMarker T(TimerStack::TT_initUnhandled, Func);
    210   FindPreference = false;
    211   FindOverlap = false;
    212   SizeT NumVars = 0;
    213 
    214   // Iterate across all instructions and record the begin and end of the live
    215   // range for each variable that is pre-colored or infinite weight.
    216   CfgVector<InstNumberT> LRBegin(Vars.size(), Inst::NumberSentinel);
    217   CfgVector<InstNumberT> LREnd(Vars.size(), Inst::NumberSentinel);
    218   DefUseErrorList DefsWithoutUses, UsesBeforeDefs;
    219   for (CfgNode *Node : Func->getNodes()) {
    220     for (Inst &Instr : Node->getInsts()) {
    221       if (Instr.isDeleted())
    222         continue;
    223       FOREACH_VAR_IN_INST(Var, Instr) {
    224         if (Var->getIgnoreLiveness())
    225           continue;
    226         if (Var->hasReg() || Var->mustHaveReg()) {
    227           SizeT VarNum = Var->getIndex();
    228           LREnd[VarNum] = Instr.getNumber();
    229           if (!Var->getIsArg() && LRBegin[VarNum] == Inst::NumberSentinel)
    230             UsesBeforeDefs.push_back(VarNum);
    231         }
    232       }
    233       if (const Variable *Var = Instr.getDest()) {
    234         if (!Var->getIgnoreLiveness() &&
    235             (Var->hasReg() || Var->mustHaveReg())) {
    236           if (LRBegin[Var->getIndex()] == Inst::NumberSentinel) {
    237             LRBegin[Var->getIndex()] = Instr.getNumber();
    238             ++NumVars;
    239           }
    240         }
    241       }
    242     }
    243   }
    244 
    245   Unhandled.reserve(NumVars);
    246   UnhandledPrecolored.reserve(NumVars);
    247   for (SizeT i = 0; i < Vars.size(); ++i) {
    248     Variable *Var = Vars[i];
    249     if (Var->isRematerializable())
    250       continue;
    251     if (LRBegin[i] != Inst::NumberSentinel) {
    252       if (LREnd[i] == Inst::NumberSentinel) {
    253         DefsWithoutUses.push_back(i);
    254         continue;
    255       }
    256       Unhandled.push_back(Var);
    257       Var->resetLiveRange();
    258       Var->addLiveRange(LRBegin[i], LREnd[i]);
    259       Var->untrimLiveRange();
    260       if (Var->hasReg()) {
    261         Var->setRegNumTmp(Var->getRegNum());
    262         Var->setMustHaveReg();
    263         UnhandledPrecolored.push_back(Var);
    264       }
    265       --NumVars;
    266     }
    267   }
    268 
    269   if (!livenessValidateIntervals(DefsWithoutUses, UsesBeforeDefs, LRBegin,
    270                                  LREnd)) {
    271     llvm::report_fatal_error("initForInfOnly: Liveness error");
    272     return;
    273   }
    274 
    275   if (!DefsWithoutUses.empty() || !UsesBeforeDefs.empty()) {
    276     if (BuildDefs::dump()) {
    277       OstreamLocker L(Ctx);
    278       Ostream &Str = Ctx->getStrDump();
    279       for (SizeT VarNum : DefsWithoutUses) {
    280         Variable *Var = Vars[VarNum];
    281         Str << "LR def without use, instruction " << LRBegin[VarNum]
    282             << ", variable " << Var->getName() << "\n";
    283       }
    284       for (SizeT VarNum : UsesBeforeDefs) {
    285         Variable *Var = Vars[VarNum];
    286         Str << "LR use before def, instruction " << LREnd[VarNum]
    287             << ", variable " << Var->getName() << "\n";
    288       }
    289     }
    290     llvm::report_fatal_error("initForInfOnly: Liveness error");
    291   }
    292   // This isn't actually a fatal condition, but it would be nice to know if we
    293   // somehow pre-calculated Unhandled's size wrong.
    294   assert(NumVars == 0);
    295 
    296   // Don't build up the list of Kills because we know that no infinite-weight
    297   // Variable has a live range spanning a call.
    298   Kills.clear();
    299 }
    300 
    301 void LinearScan::initForSecondChance() {
    302   TimerMarker T(TimerStack::TT_initUnhandled, Func);
    303   FindPreference = true;
    304   FindOverlap = true;
    305   const VarList &Vars = Func->getVariables();
    306   Unhandled.reserve(Vars.size());
    307   UnhandledPrecolored.reserve(Vars.size());
    308   for (Variable *Var : Vars) {
    309     if (Var->isRematerializable())
    310       continue;
    311     if (Var->hasReg()) {
    312       Var->untrimLiveRange();
    313       Var->setRegNumTmp(Var->getRegNum());
    314       Var->setMustHaveReg();
    315       UnhandledPrecolored.push_back(Var);
    316       Unhandled.push_back(Var);
    317     }
    318   }
    319   for (Variable *Var : Evicted) {
    320     Var->untrimLiveRange();
    321     Unhandled.push_back(Var);
    322   }
    323 }
    324 
    325 void LinearScan::init(RegAllocKind Kind, CfgSet<Variable *> ExcludeVars) {
    326   this->Kind = Kind;
    327   Unhandled.clear();
    328   UnhandledPrecolored.clear();
    329   Handled.clear();
    330   Inactive.clear();
    331   Active.clear();
    332   Vars.clear();
    333   Vars.reserve(Func->getVariables().size() - ExcludeVars.size());
    334   for (auto *Var : Func->getVariables()) {
    335     if (ExcludeVars.find(Var) == ExcludeVars.end())
    336       Vars.emplace_back(Var);
    337   }
    338 
    339   SizeT NumRegs = Target->getNumRegisters();
    340   RegAliases.resize(NumRegs);
    341   for (SizeT Reg = 0; Reg < NumRegs; ++Reg) {
    342     RegAliases[Reg] = &Target->getAliasesForRegister(RegNumT::fromInt(Reg));
    343   }
    344 
    345   switch (Kind) {
    346   case RAK_Unknown:
    347     llvm::report_fatal_error("Invalid RAK_Unknown");
    348     break;
    349   case RAK_Global:
    350   case RAK_Phi:
    351     initForGlobal();
    352     break;
    353   case RAK_InfOnly:
    354     initForInfOnly();
    355     break;
    356   case RAK_SecondChance:
    357     initForSecondChance();
    358     break;
    359   }
    360 
    361   Evicted.clear();
    362 
    363   auto CompareRanges = [](const Variable *L, const Variable *R) {
    364     InstNumberT Lstart = L->getLiveRange().getStart();
    365     InstNumberT Rstart = R->getLiveRange().getStart();
    366     if (Lstart == Rstart)
    367       return L->getIndex() < R->getIndex();
    368     return Lstart < Rstart;
    369   };
    370   // Do a reverse sort so that erasing elements (from the end) is fast.
    371   std::sort(Unhandled.rbegin(), Unhandled.rend(), CompareRanges);
    372   std::sort(UnhandledPrecolored.rbegin(), UnhandledPrecolored.rend(),
    373             CompareRanges);
    374 
    375   Handled.reserve(Unhandled.size());
    376   Inactive.reserve(Unhandled.size());
    377   Active.reserve(Unhandled.size());
    378   Evicted.reserve(Unhandled.size());
    379 }
    380 
    381 // This is called when Cur must be allocated a register but no registers are
    382 // available across Cur's live range. To handle this, we find a register that is
    383 // not explicitly used during Cur's live range, spill that register to a stack
    384 // location right before Cur's live range begins, and fill (reload) the register
    385 // from the stack location right after Cur's live range ends.
    386 void LinearScan::addSpillFill(IterationState &Iter) {
    387   // Identify the actual instructions that begin and end Iter.Cur's live range.
    388   // Iterate through Iter.Cur's node's instruction list until we find the actual
    389   // instructions with instruction numbers corresponding to Iter.Cur's recorded
    390   // live range endpoints.  This sounds inefficient but shouldn't be a problem
    391   // in practice because:
    392   // (1) This function is almost never called in practice.
    393   // (2) Since this register over-subscription problem happens only for
    394   //     phi-lowered instructions, the number of instructions in the node is
    395   //     proportional to the number of phi instructions in the original node,
    396   //     which is never very large in practice.
    397   // (3) We still have to iterate through all instructions of Iter.Cur's live
    398   //     range to find all explicitly used registers (though the live range is
    399   //     usually only 2-3 instructions), so the main cost that could be avoided
    400   //     would be finding the instruction that begin's Iter.Cur's live range.
    401   assert(!Iter.Cur->getLiveRange().isEmpty());
    402   InstNumberT Start = Iter.Cur->getLiveRange().getStart();
    403   InstNumberT End = Iter.Cur->getLiveRange().getEnd();
    404   CfgNode *Node = Func->getVMetadata()->getLocalUseNode(Iter.Cur);
    405   assert(Node);
    406   InstList &Insts = Node->getInsts();
    407   InstList::iterator SpillPoint = Insts.end();
    408   InstList::iterator FillPoint = Insts.end();
    409   // Stop searching after we have found both the SpillPoint and the FillPoint.
    410   for (auto I = Insts.begin(), E = Insts.end();
    411        I != E && (SpillPoint == E || FillPoint == E); ++I) {
    412     if (I->getNumber() == Start)
    413       SpillPoint = I;
    414     if (I->getNumber() == End)
    415       FillPoint = I;
    416     if (SpillPoint != E) {
    417       // Remove from RegMask any physical registers referenced during Cur's live
    418       // range. Start looking after SpillPoint gets set, i.e. once Cur's live
    419       // range begins.
    420       FOREACH_VAR_IN_INST(Var, *I) {
    421         if (!Var->hasRegTmp())
    422           continue;
    423         const auto &Aliases = *RegAliases[Var->getRegNumTmp()];
    424         for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
    425           Iter.RegMask[RegAlias] = false;
    426         }
    427       }
    428     }
    429   }
    430   assert(SpillPoint != Insts.end());
    431   assert(FillPoint != Insts.end());
    432   ++FillPoint;
    433   // TODO(stichnot): Randomize instead of *.begin() which maps to find_first().
    434   const RegNumT RegNum = *RegNumBVIter(Iter.RegMask).begin();
    435   Iter.Cur->setRegNumTmp(RegNum);
    436   Variable *Preg = Target->getPhysicalRegister(RegNum, Iter.Cur->getType());
    437   // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that SpillLoc
    438   // is correctly identified as !isMultiBlock(), reducing stack frame size.
    439   Variable *SpillLoc = Func->makeVariable(Iter.Cur->getType());
    440   // Add "reg=FakeDef;spill=reg" before SpillPoint
    441   Target->lowerInst(Node, SpillPoint, InstFakeDef::create(Func, Preg));
    442   Target->lowerInst(Node, SpillPoint, InstAssign::create(Func, SpillLoc, Preg));
    443   // add "reg=spill;FakeUse(reg)" before FillPoint
    444   Target->lowerInst(Node, FillPoint, InstAssign::create(Func, Preg, SpillLoc));
    445   Target->lowerInst(Node, FillPoint, InstFakeUse::create(Func, Preg));
    446 }
    447 
    448 void LinearScan::handleActiveRangeExpiredOrInactive(const Variable *Cur) {
    449   for (SizeT I = Active.size(); I > 0; --I) {
    450     const SizeT Index = I - 1;
    451     Variable *Item = Active[Index];
    452     Item->trimLiveRange(Cur->getLiveRange().getStart());
    453     bool Moved = false;
    454     if (Item->rangeEndsBefore(Cur)) {
    455       // Move Item from Active to Handled list.
    456       dumpLiveRangeTrace("Expiring     ", Item);
    457       moveItem(Active, Index, Handled);
    458       Moved = true;
    459     } else if (!Item->rangeOverlapsStart(Cur)) {
    460       // Move Item from Active to Inactive list.
    461       dumpLiveRangeTrace("Inactivating ", Item);
    462       moveItem(Active, Index, Inactive);
    463       Moved = true;
    464     }
    465     if (Moved) {
    466       // Decrement Item from RegUses[].
    467       assert(Item->hasRegTmp());
    468       const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
    469       for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
    470         --RegUses[RegAlias];
    471         assert(RegUses[RegAlias] >= 0);
    472       }
    473     }
    474   }
    475 }
    476 
    477 void LinearScan::handleInactiveRangeExpiredOrReactivated(const Variable *Cur) {
    478   for (SizeT I = Inactive.size(); I > 0; --I) {
    479     const SizeT Index = I - 1;
    480     Variable *Item = Inactive[Index];
    481     Item->trimLiveRange(Cur->getLiveRange().getStart());
    482     if (Item->rangeEndsBefore(Cur)) {
    483       // Move Item from Inactive to Handled list.
    484       dumpLiveRangeTrace("Expiring     ", Item);
    485       moveItem(Inactive, Index, Handled);
    486     } else if (Item->rangeOverlapsStart(Cur)) {
    487       // Move Item from Inactive to Active list.
    488       dumpLiveRangeTrace("Reactivating ", Item);
    489       moveItem(Inactive, Index, Active);
    490       // Increment Item in RegUses[].
    491       assert(Item->hasRegTmp());
    492       const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
    493       for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
    494         assert(RegUses[RegAlias] >= 0);
    495         ++RegUses[RegAlias];
    496       }
    497     }
    498   }
    499 }
    500 
    501 // Infer register preference and allowable overlap. Only form a preference when
    502 // the current Variable has an unambiguous "first" definition. The preference is
    503 // some source Variable of the defining instruction that either is assigned a
    504 // register that is currently free, or that is assigned a register that is not
    505 // free but overlap is allowed. Overlap is allowed when the Variable under
    506 // consideration is single-definition, and its definition is a simple assignment
    507 // - i.e., the register gets copied/aliased but is never modified.  Furthermore,
    508 // overlap is only allowed when preferred Variable definition instructions do
    509 // not appear within the current Variable's live range.
    510 void LinearScan::findRegisterPreference(IterationState &Iter) {
    511   Iter.Prefer = nullptr;
    512   Iter.PreferReg = RegNumT();
    513   Iter.AllowOverlap = false;
    514 
    515   if (!FindPreference)
    516     return;
    517 
    518   VariablesMetadata *VMetadata = Func->getVMetadata();
    519   const Inst *DefInst = VMetadata->getFirstDefinitionSingleBlock(Iter.Cur);
    520   if (DefInst == nullptr)
    521     return;
    522 
    523   assert(DefInst->getDest() == Iter.Cur);
    524   const bool IsSingleDefAssign =
    525       DefInst->isVarAssign() && !VMetadata->isMultiDef(Iter.Cur);
    526   FOREACH_VAR_IN_INST(SrcVar, *DefInst) {
    527     // Only consider source variables that have (so far) been assigned a
    528     // register.
    529     if (!SrcVar->hasRegTmp())
    530       continue;
    531 
    532     // That register must be one in the RegMask set, e.g. don't try to prefer
    533     // the stack pointer as a result of the stacksave intrinsic.
    534     const auto &Aliases = *RegAliases[SrcVar->getRegNumTmp()];
    535     const int SrcReg = (Iter.RegMask & Aliases).find_first();
    536     if (SrcReg == -1)
    537       continue;
    538 
    539     if (FindOverlap && IsSingleDefAssign && !Iter.Free[SrcReg]) {
    540       // Don't bother trying to enable AllowOverlap if the register is already
    541       // free (hence the test on Iter.Free[SrcReg]).
    542       Iter.AllowOverlap = !overlapsDefs(Func, Iter.Cur, SrcVar);
    543     }
    544     if (Iter.AllowOverlap || Iter.Free[SrcReg]) {
    545       Iter.Prefer = SrcVar;
    546       Iter.PreferReg = RegNumT::fromInt(SrcReg);
    547       // Stop looking for a preference after the first valid preference is
    548       // found.  One might think that we should look at all instruction
    549       // variables to find the best <Prefer,AllowOverlap> combination, but note
    550       // that AllowOverlap can only be true for a simple assignment statement
    551       // which can have only one source operand, so it's not possible for
    552       // AllowOverlap to be true beyond the first source operand.
    553       FOREACH_VAR_IN_INST_BREAK;
    554     }
    555   }
    556   if (BuildDefs::dump() && Verbose && Iter.Prefer) {
    557     Ostream &Str = Ctx->getStrDump();
    558     Str << "Initial Iter.Prefer=";
    559     Iter.Prefer->dump(Func);
    560     Str << " R=" << Iter.PreferReg << " LIVE=" << Iter.Prefer->getLiveRange()
    561         << " Overlap=" << Iter.AllowOverlap << "\n";
    562   }
    563 }
    564 
    565 // Remove registers from the Iter.Free[] list where an Inactive range overlaps
    566 // with the current range.
    567 void LinearScan::filterFreeWithInactiveRanges(IterationState &Iter) {
    568   for (const Variable *Item : Inactive) {
    569     if (!Item->rangeOverlaps(Iter.Cur))
    570       continue;
    571     const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
    572     for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
    573       // Don't assert(Iter.Free[RegAlias]) because in theory (though probably
    574       // never in practice) there could be two inactive variables that were
    575       // marked with AllowOverlap.
    576       Iter.Free[RegAlias] = false;
    577       Iter.FreeUnfiltered[RegAlias] = false;
    578       // Disable AllowOverlap if an Inactive variable, which is not Prefer,
    579       // shares Prefer's register, and has a definition within Cur's live range.
    580       if (Iter.AllowOverlap && Item != Iter.Prefer &&
    581           RegAlias == Iter.PreferReg && overlapsDefs(Func, Iter.Cur, Item)) {
    582         Iter.AllowOverlap = false;
    583         dumpDisableOverlap(Func, Item, "Inactive");
    584       }
    585     }
    586   }
    587 }
    588 
    589 // Remove registers from the Iter.Free[] list where an Unhandled pre-colored
    590 // range overlaps with the current range, and set those registers to infinite
    591 // weight so that they aren't candidates for eviction.
    592 // Cur->rangeEndsBefore(Item) is an early exit check that turns a guaranteed
    593 // O(N^2) algorithm into expected linear complexity.
    594 void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) {
    595   // TODO(stichnot): Partition UnhandledPrecolored according to register class,
    596   // to restrict the number of overlap comparisons needed.
    597   for (Variable *Item : reverse_range(UnhandledPrecolored)) {
    598     assert(Item->hasReg());
    599     if (Iter.Cur->rangeEndsBefore(Item))
    600       break;
    601     if (!Item->rangeOverlaps(Iter.Cur))
    602       continue;
    603     const auto &Aliases =
    604         *RegAliases[Item->getRegNum()]; // Note: not getRegNumTmp()
    605     for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
    606       Iter.Weights[RegAlias].setWeight(RegWeight::Inf);
    607       Iter.Free[RegAlias] = false;
    608       Iter.FreeUnfiltered[RegAlias] = false;
    609       Iter.PrecoloredUnhandledMask[RegAlias] = true;
    610       // Disable Iter.AllowOverlap if the preferred register is one of these
    611       // pre-colored unhandled overlapping ranges.
    612       if (Iter.AllowOverlap && RegAlias == Iter.PreferReg) {
    613         Iter.AllowOverlap = false;
    614         dumpDisableOverlap(Func, Item, "PrecoloredUnhandled");
    615       }
    616     }
    617   }
    618 }
    619 
    620 void LinearScan::allocatePrecoloredRegister(Variable *Cur) {
    621   const auto RegNum = Cur->getRegNum();
    622   // RegNumTmp should have already been set above.
    623   assert(Cur->getRegNumTmp() == RegNum);
    624   dumpLiveRangeTrace("Precoloring  ", Cur);
    625   Active.push_back(Cur);
    626   const auto &Aliases = *RegAliases[RegNum];
    627   for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
    628     assert(RegUses[RegAlias] >= 0);
    629     ++RegUses[RegAlias];
    630   }
    631   assert(!UnhandledPrecolored.empty());
    632   assert(UnhandledPrecolored.back() == Cur);
    633   UnhandledPrecolored.pop_back();
    634 }
    635 
    636 void LinearScan::allocatePreferredRegister(IterationState &Iter) {
    637   Iter.Cur->setRegNumTmp(Iter.PreferReg);
    638   dumpLiveRangeTrace("Preferring   ", Iter.Cur);
    639   const auto &Aliases = *RegAliases[Iter.PreferReg];
    640   for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
    641     assert(RegUses[RegAlias] >= 0);
    642     ++RegUses[RegAlias];
    643   }
    644   Active.push_back(Iter.Cur);
    645 }
    646 
    647 void LinearScan::allocateFreeRegister(IterationState &Iter, bool Filtered) {
    648   const RegNumT RegNum =
    649       *RegNumBVIter(Filtered ? Iter.Free : Iter.FreeUnfiltered).begin();
    650   Iter.Cur->setRegNumTmp(RegNum);
    651   if (Filtered)
    652     dumpLiveRangeTrace("Allocating Y ", Iter.Cur);
    653   else
    654     dumpLiveRangeTrace("Allocating X ", Iter.Cur);
    655   const auto &Aliases = *RegAliases[RegNum];
    656   for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
    657     assert(RegUses[RegAlias] >= 0);
    658     ++RegUses[RegAlias];
    659   }
    660   Active.push_back(Iter.Cur);
    661 }
    662 
    663 void LinearScan::handleNoFreeRegisters(IterationState &Iter) {
    664   // Check Active ranges.
    665   for (const Variable *Item : Active) {
    666     assert(Item->rangeOverlaps(Iter.Cur));
    667     assert(Item->hasRegTmp());
    668     const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
    669     // We add the Item's weight to each alias/subregister to represent that,
    670     // should we decide to pick any of them, then we would incur that many
    671     // memory accesses.
    672     RegWeight W = Item->getWeight(Func);
    673     for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
    674       Iter.Weights[RegAlias].addWeight(W);
    675     }
    676   }
    677   // Same as above, but check Inactive ranges instead of Active.
    678   for (const Variable *Item : Inactive) {
    679     if (!Item->rangeOverlaps(Iter.Cur))
    680       continue;
    681     assert(Item->hasRegTmp());
    682     const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
    683     RegWeight W = Item->getWeight(Func);
    684     for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
    685       Iter.Weights[RegAlias].addWeight(W);
    686     }
    687   }
    688 
    689   // All the weights are now calculated. Find the register with smallest weight.
    690   int32_t MinWeightIndex = findMinWeightIndex(Iter.RegMask, Iter.Weights);
    691 
    692   if (MinWeightIndex < 0 ||
    693       Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
    694     if (!Iter.Cur->mustHaveReg()) {
    695       // Iter.Cur doesn't have priority over any other live ranges, so don't
    696       // allocate any register to it, and move it to the Handled state.
    697       Handled.push_back(Iter.Cur);
    698       return;
    699     }
    700     if (Kind == RAK_Phi) {
    701       // Iter.Cur is infinite-weight but all physical registers are already
    702       // taken, so we need to force one to be temporarily available.
    703       addSpillFill(Iter);
    704       Handled.push_back(Iter.Cur);
    705       return;
    706     }
    707     // The remaining portion of the enclosing "if" block should only be
    708     // reachable if we are manually limiting physical registers for testing.
    709     if (UseReserve) {
    710       if (Iter.FreeUnfiltered.any()) {
    711         // There is some available physical register held in reserve, so use it.
    712         constexpr bool NotFiltered = false;
    713         allocateFreeRegister(Iter, NotFiltered);
    714         // Iter.Cur is now on the Active list.
    715         return;
    716       }
    717       // At this point, we need to find some reserve register that is already
    718       // assigned to a non-infinite-weight variable.  This could happen if some
    719       // variable was previously assigned an alias of such a register.
    720       MinWeightIndex = findMinWeightIndex(Iter.RegMaskUnfiltered, Iter.Weights);
    721     }
    722     if (Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
    723       dumpLiveRangeTrace("Failing      ", Iter.Cur);
    724       Func->setError("Unable to find a physical register for an "
    725                      "infinite-weight live range "
    726                      "(consider using -reg-reserve): " +
    727                      Iter.Cur->getName());
    728       Handled.push_back(Iter.Cur);
    729       return;
    730     }
    731     // At this point, MinWeightIndex points to a valid reserve register to
    732     // reallocate to Iter.Cur, so drop into the eviction code.
    733   }
    734 
    735   // Evict all live ranges in Active that register number MinWeightIndex is
    736   // assigned to.
    737   const auto &Aliases = *RegAliases[MinWeightIndex];
    738   for (SizeT I = Active.size(); I > 0; --I) {
    739     const SizeT Index = I - 1;
    740     Variable *Item = Active[Index];
    741     const auto RegNum = Item->getRegNumTmp();
    742     if (Aliases[RegNum]) {
    743       dumpLiveRangeTrace("Evicting A   ", Item);
    744       const auto &Aliases = *RegAliases[RegNum];
    745       for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
    746         --RegUses[RegAlias];
    747         assert(RegUses[RegAlias] >= 0);
    748       }
    749       Item->setRegNumTmp(RegNumT());
    750       moveItem(Active, Index, Handled);
    751       Evicted.push_back(Item);
    752     }
    753   }
    754   // Do the same for Inactive.
    755   for (SizeT I = Inactive.size(); I > 0; --I) {
    756     const SizeT Index = I - 1;
    757     Variable *Item = Inactive[Index];
    758     // Note: The Item->rangeOverlaps(Cur) clause is not part of the description
    759     // of AssignMemLoc() in the original paper. But there doesn't seem to be any
    760     // need to evict an inactive live range that doesn't overlap with the live
    761     // range currently being considered. It's especially bad if we would end up
    762     // evicting an infinite-weight but currently-inactive live range. The most
    763     // common situation for this would be a scratch register kill set for call
    764     // instructions.
    765     if (Aliases[Item->getRegNumTmp()] && Item->rangeOverlaps(Iter.Cur)) {
    766       dumpLiveRangeTrace("Evicting I   ", Item);
    767       Item->setRegNumTmp(RegNumT());
    768       moveItem(Inactive, Index, Handled);
    769       Evicted.push_back(Item);
    770     }
    771   }
    772   // Assign the register to Cur.
    773   Iter.Cur->setRegNumTmp(RegNumT::fromInt(MinWeightIndex));
    774   for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
    775     assert(RegUses[RegAlias] >= 0);
    776     ++RegUses[RegAlias];
    777   }
    778   Active.push_back(Iter.Cur);
    779   dumpLiveRangeTrace("Allocating Z ", Iter.Cur);
    780 }
    781 
    782 void LinearScan::assignFinalRegisters(const SmallBitVector &RegMaskFull,
    783                                       const SmallBitVector &PreDefinedRegisters,
    784                                       bool Randomized) {
    785   const size_t NumRegisters = RegMaskFull.size();
    786   llvm::SmallVector<RegNumT, REGS_SIZE> Permutation(NumRegisters);
    787   if (Randomized) {
    788     // Create a random number generator for regalloc randomization. Merge
    789     // function's sequence and Kind value as the Salt. Because regAlloc() is
    790     // called twice under O2, the second time with RAK_Phi, we check Kind ==
    791     // RAK_Phi to determine the lowest-order bit to make sure the Salt is
    792     // different.
    793     uint64_t Salt =
    794         (Func->getSequenceNumber() << 1) ^ (Kind == RAK_Phi ? 0u : 1u);
    795     Target->makeRandomRegisterPermutation(
    796         Permutation, PreDefinedRegisters | ~RegMaskFull, Salt);
    797   }
    798 
    799   // Finish up by setting RegNum = RegNumTmp (or a random permutation thereof)
    800   // for each Variable.
    801   for (Variable *Item : Handled) {
    802     const auto RegNum = Item->getRegNumTmp();
    803     auto AssignedRegNum = RegNum;
    804 
    805     if (Randomized && Item->hasRegTmp() && !Item->hasReg()) {
    806       AssignedRegNum = Permutation[RegNum];
    807     }
    808     if (BuildDefs::dump() && Verbose) {
    809       Ostream &Str = Ctx->getStrDump();
    810       if (!Item->hasRegTmp()) {
    811         Str << "Not assigning ";
    812         Item->dump(Func);
    813         Str << "\n";
    814       } else {
    815         Str << (AssignedRegNum == Item->getRegNum() ? "Reassigning "
    816                                                     : "Assigning ")
    817             << Target->getRegName(AssignedRegNum, Item->getType()) << "(r"
    818             << AssignedRegNum << ") to ";
    819         Item->dump(Func);
    820         Str << "\n";
    821       }
    822     }
    823     Item->setRegNum(AssignedRegNum);
    824   }
    825 }
    826 
    827 // Implements the linear-scan algorithm. Based on "Linear Scan Register
    828 // Allocation in the Context of SSA Form and Register Constraints" by Hanspeter
    829 // Mssenbck and Michael Pfeiffer,
    830 // ftp://ftp.ssw.uni-linz.ac.at/pub/Papers/Moe02.PDF. This implementation is
    831 // modified to take affinity into account and allow two interfering variables to
    832 // share the same register in certain cases.
    833 //
    834 // Requires running Cfg::liveness(Liveness_Intervals) in preparation. Results
    835 // are assigned to Variable::RegNum for each Variable.
    836 void LinearScan::scan(const SmallBitVector &RegMaskFull, bool Randomized) {
    837   TimerMarker T(TimerStack::TT_linearScan, Func);
    838   assert(RegMaskFull.any()); // Sanity check
    839   if (Verbose)
    840     Ctx->lockStr();
    841   Func->resetCurrentNode();
    842   const size_t NumRegisters = RegMaskFull.size();
    843   SmallBitVector PreDefinedRegisters(NumRegisters);
    844   if (Randomized) {
    845     for (Variable *Var : UnhandledPrecolored) {
    846       PreDefinedRegisters[Var->getRegNum()] = true;
    847     }
    848   }
    849 
    850   // Build a LiveRange representing the Kills list.
    851   LiveRange KillsRange(Kills);
    852   KillsRange.untrim();
    853 
    854   // Reset the register use count.
    855   RegUses.resize(NumRegisters);
    856   std::fill(RegUses.begin(), RegUses.end(), 0);
    857 
    858   // Unhandled is already set to all ranges in increasing order of start points.
    859   assert(Active.empty());
    860   assert(Inactive.empty());
    861   assert(Handled.empty());
    862   const TargetLowering::RegSetMask RegsInclude =
    863       TargetLowering::RegSet_CallerSave;
    864   const TargetLowering::RegSetMask RegsExclude = TargetLowering::RegSet_None;
    865   const SmallBitVector KillsMask =
    866       Target->getRegisterSet(RegsInclude, RegsExclude);
    867 
    868   // Allocate memory once outside the loop.
    869   IterationState Iter;
    870   Iter.Weights.reserve(NumRegisters);
    871   Iter.PrecoloredUnhandledMask.reserve(NumRegisters);
    872 
    873   while (!Unhandled.empty()) {
    874     Iter.Cur = Unhandled.back();
    875     Unhandled.pop_back();
    876     dumpLiveRangeTrace("\nConsidering  ", Iter.Cur);
    877     if (UseReserve)
    878       assert(Target->getAllRegistersForVariable(Iter.Cur).any());
    879     else
    880       assert(Target->getRegistersForVariable(Iter.Cur).any());
    881     Iter.RegMask = RegMaskFull & Target->getRegistersForVariable(Iter.Cur);
    882     Iter.RegMaskUnfiltered =
    883         RegMaskFull & Target->getAllRegistersForVariable(Iter.Cur);
    884     KillsRange.trim(Iter.Cur->getLiveRange().getStart());
    885 
    886     // Check for pre-colored ranges. If Cur is pre-colored, it definitely gets
    887     // that register. Previously processed live ranges would have avoided that
    888     // register due to it being pre-colored. Future processed live ranges won't
    889     // evict that register because the live range has infinite weight.
    890     if (Iter.Cur->hasReg()) {
    891       allocatePrecoloredRegister(Iter.Cur);
    892       continue;
    893     }
    894 
    895     handleActiveRangeExpiredOrInactive(Iter.Cur);
    896     handleInactiveRangeExpiredOrReactivated(Iter.Cur);
    897 
    898     // Calculate available registers into Iter.Free[] and Iter.FreeUnfiltered[].
    899     Iter.Free = Iter.RegMask;
    900     Iter.FreeUnfiltered = Iter.RegMaskUnfiltered;
    901     for (SizeT i = 0; i < Iter.RegMask.size(); ++i) {
    902       if (RegUses[i] > 0) {
    903         Iter.Free[i] = false;
    904         Iter.FreeUnfiltered[i] = false;
    905       }
    906     }
    907 
    908     findRegisterPreference(Iter);
    909     filterFreeWithInactiveRanges(Iter);
    910 
    911     // Disable AllowOverlap if an Active variable, which is not Prefer, shares
    912     // Prefer's register, and has a definition within Cur's live range.
    913     if (Iter.AllowOverlap) {
    914       const auto &Aliases = *RegAliases[Iter.PreferReg];
    915       for (const Variable *Item : Active) {
    916         const RegNumT RegNum = Item->getRegNumTmp();
    917         if (Item != Iter.Prefer && Aliases[RegNum] &&
    918             overlapsDefs(Func, Iter.Cur, Item)) {
    919           Iter.AllowOverlap = false;
    920           dumpDisableOverlap(Func, Item, "Active");
    921         }
    922       }
    923     }
    924 
    925     Iter.Weights.resize(Iter.RegMask.size());
    926     std::fill(Iter.Weights.begin(), Iter.Weights.end(), RegWeight());
    927 
    928     Iter.PrecoloredUnhandledMask.resize(Iter.RegMask.size());
    929     Iter.PrecoloredUnhandledMask.reset();
    930 
    931     filterFreeWithPrecoloredRanges(Iter);
    932 
    933     // Remove scratch registers from the Iter.Free[] list, and mark their
    934     // Iter.Weights[] as infinite, if KillsRange overlaps Cur's live range.
    935     constexpr bool UseTrimmed = true;
    936     if (Iter.Cur->getLiveRange().overlaps(KillsRange, UseTrimmed)) {
    937       Iter.Free.reset(KillsMask);
    938       Iter.FreeUnfiltered.reset(KillsMask);
    939       for (RegNumT i : RegNumBVIter(KillsMask)) {
    940         Iter.Weights[i].setWeight(RegWeight::Inf);
    941         if (Iter.PreferReg == i)
    942           Iter.AllowOverlap = false;
    943       }
    944     }
    945 
    946     // Print info about physical register availability.
    947     if (BuildDefs::dump() && Verbose) {
    948       Ostream &Str = Ctx->getStrDump();
    949       for (RegNumT i : RegNumBVIter(Iter.RegMaskUnfiltered)) {
    950         Str << Target->getRegName(i, Iter.Cur->getType()) << "(U=" << RegUses[i]
    951             << ",F=" << Iter.Free[i] << ",P=" << Iter.PrecoloredUnhandledMask[i]
    952             << ") ";
    953       }
    954       Str << "\n";
    955     }
    956 
    957     if (Iter.Prefer && (Iter.AllowOverlap || Iter.Free[Iter.PreferReg])) {
    958       // First choice: a preferred register that is either free or is allowed to
    959       // overlap with its linked variable.
    960       allocatePreferredRegister(Iter);
    961     } else if (Iter.Free.any()) {
    962       // Second choice: any free register.
    963       constexpr bool Filtered = true;
    964       allocateFreeRegister(Iter, Filtered);
    965     } else {
    966       // Fallback: there are no free registers, so we look for the lowest-weight
    967       // register and see if Cur has higher weight.
    968       handleNoFreeRegisters(Iter);
    969     }
    970     dump(Func);
    971   }
    972 
    973   // Move anything Active or Inactive to Handled for easier handling.
    974   Handled.insert(Handled.end(), Active.begin(), Active.end());
    975   Active.clear();
    976   Handled.insert(Handled.end(), Inactive.begin(), Inactive.end());
    977   Inactive.clear();
    978   dump(Func);
    979 
    980   assignFinalRegisters(RegMaskFull, PreDefinedRegisters, Randomized);
    981 
    982   if (Verbose)
    983     Ctx->unlockStr();
    984 }
    985 
    986 // ======================== Dump routines ======================== //
    987 
    988 void LinearScan::dumpLiveRangeTrace(const char *Label, const Variable *Item) {
    989   if (!BuildDefs::dump())
    990     return;
    991 
    992   if (Verbose) {
    993     Ostream &Str = Ctx->getStrDump();
    994     Str << Label;
    995     dumpLiveRange(Item, Func);
    996     Str << "\n";
    997   }
    998 }
    999 
   1000 void LinearScan::dump(Cfg *Func) const {
   1001   if (!BuildDefs::dump())
   1002     return;
   1003   if (!Verbose)
   1004     return;
   1005   Ostream &Str = Func->getContext()->getStrDump();
   1006   Func->resetCurrentNode();
   1007   Str << "**** Current regalloc state:\n";
   1008   Str << "++++++ Handled:\n";
   1009   for (const Variable *Item : Handled) {
   1010     dumpLiveRange(Item, Func);
   1011     Str << "\n";
   1012   }
   1013   Str << "++++++ Unhandled:\n";
   1014   for (const Variable *Item : reverse_range(Unhandled)) {
   1015     dumpLiveRange(Item, Func);
   1016     Str << "\n";
   1017   }
   1018   Str << "++++++ Active:\n";
   1019   for (const Variable *Item : Active) {
   1020     dumpLiveRange(Item, Func);
   1021     Str << "\n";
   1022   }
   1023   Str << "++++++ Inactive:\n";
   1024   for (const Variable *Item : Inactive) {
   1025     dumpLiveRange(Item, Func);
   1026     Str << "\n";
   1027   }
   1028 }
   1029 
   1030 } // end of namespace Ice
   1031