Home | History | Annotate | Download | only in CodeGen
      1 //===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
     11 // register allocator for LLVM. This allocator works by constructing a PBQP
     12 // problem representing the register allocation problem under consideration,
     13 // solving this using a PBQP solver, and mapping the solution back to a
     14 // register assignment. If any variables are selected for spilling then spill
     15 // code is inserted and the process repeated.
     16 //
     17 // The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
     18 // for register allocation. For more information on PBQP for register
     19 // allocation, see the following papers:
     20 //
     21 //   (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
     22 //   PBQP. In Proceedings of the 7th Joint Modular Languages Conference
     23 //   (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
     24 //
     25 //   (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
     26 //   architectures. In Proceedings of the Joint Conference on Languages,
     27 //   Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
     28 //   NY, USA, 139-148.
     29 //
     30 //===----------------------------------------------------------------------===//
     31 
     32 #include "llvm/CodeGen/RegAllocPBQP.h"
     33 #include "RegisterCoalescer.h"
     34 #include "Spiller.h"
     35 #include "llvm/Analysis/AliasAnalysis.h"
     36 #include "llvm/CodeGen/CalcSpillWeights.h"
     37 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
     38 #include "llvm/CodeGen/LiveRangeEdit.h"
     39 #include "llvm/CodeGen/LiveStackAnalysis.h"
     40 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
     41 #include "llvm/CodeGen/MachineDominators.h"
     42 #include "llvm/CodeGen/MachineFunctionPass.h"
     43 #include "llvm/CodeGen/MachineLoopInfo.h"
     44 #include "llvm/CodeGen/MachineRegisterInfo.h"
     45 #include "llvm/CodeGen/RegAllocRegistry.h"
     46 #include "llvm/CodeGen/VirtRegMap.h"
     47 #include "llvm/IR/Module.h"
     48 #include "llvm/Support/Debug.h"
     49 #include "llvm/Support/FileSystem.h"
     50 #include "llvm/Support/Printable.h"
     51 #include "llvm/Support/raw_ostream.h"
     52 #include "llvm/Target/TargetInstrInfo.h"
     53 #include "llvm/Target/TargetSubtargetInfo.h"
     54 #include <limits>
     55 #include <memory>
     56 #include <queue>
     57 #include <set>
     58 #include <sstream>
     59 #include <vector>
     60 
     61 using namespace llvm;
     62 
     63 #define DEBUG_TYPE "regalloc"
     64 
     65 static RegisterRegAlloc
     66 RegisterPBQPRepAlloc("pbqp", "PBQP register allocator",
     67                        createDefaultPBQPRegisterAllocator);
     68 
     69 static cl::opt<bool>
     70 PBQPCoalescing("pbqp-coalescing",
     71                 cl::desc("Attempt coalescing during PBQP register allocation."),
     72                 cl::init(false), cl::Hidden);
     73 
     74 #ifndef NDEBUG
     75 static cl::opt<bool>
     76 PBQPDumpGraphs("pbqp-dump-graphs",
     77                cl::desc("Dump graphs for each function/round in the compilation unit."),
     78                cl::init(false), cl::Hidden);
     79 #endif
     80 
     81 namespace {
     82 
     83 ///
     84 /// PBQP based allocators solve the register allocation problem by mapping
     85 /// register allocation problems to Partitioned Boolean Quadratic
     86 /// Programming problems.
     87 class RegAllocPBQP : public MachineFunctionPass {
     88 public:
     89 
     90   static char ID;
     91 
     92   /// Construct a PBQP register allocator.
     93   RegAllocPBQP(char *cPassID = nullptr)
     94       : MachineFunctionPass(ID), customPassID(cPassID) {
     95     initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
     96     initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
     97     initializeLiveStacksPass(*PassRegistry::getPassRegistry());
     98     initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
     99   }
    100 
    101   /// Return the pass name.
    102   const char* getPassName() const override {
    103     return "PBQP Register Allocator";
    104   }
    105 
    106   /// PBQP analysis usage.
    107   void getAnalysisUsage(AnalysisUsage &au) const override;
    108 
    109   /// Perform register allocation
    110   bool runOnMachineFunction(MachineFunction &MF) override;
    111 
    112 private:
    113 
    114   typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
    115   typedef std::vector<const LiveInterval*> Node2LIMap;
    116   typedef std::vector<unsigned> AllowedSet;
    117   typedef std::vector<AllowedSet> AllowedSetMap;
    118   typedef std::pair<unsigned, unsigned> RegPair;
    119   typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
    120   typedef std::set<unsigned> RegSet;
    121 
    122   char *customPassID;
    123 
    124   RegSet VRegsToAlloc, EmptyIntervalVRegs;
    125 
    126   /// \brief Finds the initial set of vreg intervals to allocate.
    127   void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS);
    128 
    129   /// \brief Constructs an initial graph.
    130   void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller);
    131 
    132   /// \brief Spill the given VReg.
    133   void spillVReg(unsigned VReg, SmallVectorImpl<unsigned> &NewIntervals,
    134                  MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM,
    135                  Spiller &VRegSpiller);
    136 
    137   /// \brief Given a solved PBQP problem maps this solution back to a register
    138   /// assignment.
    139   bool mapPBQPToRegAlloc(const PBQPRAGraph &G,
    140                          const PBQP::Solution &Solution,
    141                          VirtRegMap &VRM,
    142                          Spiller &VRegSpiller);
    143 
    144   /// \brief Postprocessing before final spilling. Sets basic block "live in"
    145   /// variables.
    146   void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS,
    147                      VirtRegMap &VRM) const;
    148 
    149 };
    150 
    151 char RegAllocPBQP::ID = 0;
    152 
    153 /// @brief Set spill costs for each node in the PBQP reg-alloc graph.
    154 class SpillCosts : public PBQPRAConstraint {
    155 public:
    156   void apply(PBQPRAGraph &G) override {
    157     LiveIntervals &LIS = G.getMetadata().LIS;
    158 
    159     // A minimum spill costs, so that register constraints can can be set
    160     // without normalization in the [0.0:MinSpillCost( interval.
    161     const PBQP::PBQPNum MinSpillCost = 10.0;
    162 
    163     for (auto NId : G.nodeIds()) {
    164       PBQP::PBQPNum SpillCost =
    165         LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight;
    166       if (SpillCost == 0.0)
    167         SpillCost = std::numeric_limits<PBQP::PBQPNum>::min();
    168       else
    169         SpillCost += MinSpillCost;
    170       PBQPRAGraph::RawVector NodeCosts(G.getNodeCosts(NId));
    171       NodeCosts[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost;
    172       G.setNodeCosts(NId, std::move(NodeCosts));
    173     }
    174   }
    175 };
    176 
    177 /// @brief Add interference edges between overlapping vregs.
    178 class Interference : public PBQPRAConstraint {
    179 private:
    180 
    181   typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr;
    182   typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IKey;
    183   typedef DenseMap<IKey, PBQPRAGraph::MatrixPtr> IMatrixCache;
    184   typedef DenseSet<IKey> DisjointAllowedRegsCache;
    185   typedef std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId> IEdgeKey;
    186   typedef DenseSet<IEdgeKey> IEdgeCache;
    187 
    188   bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
    189                                PBQPRAGraph::NodeId MId,
    190                                const DisjointAllowedRegsCache &D) const {
    191     const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
    192     const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
    193 
    194     if (NRegs == MRegs)
    195       return false;
    196 
    197     if (NRegs < MRegs)
    198       return D.count(IKey(NRegs, MRegs)) > 0;
    199 
    200     return D.count(IKey(MRegs, NRegs)) > 0;
    201   }
    202 
    203   void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
    204                               PBQPRAGraph::NodeId MId,
    205                               DisjointAllowedRegsCache &D) {
    206     const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
    207     const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
    208 
    209     assert(NRegs != MRegs && "AllowedRegs can not be disjoint with itself");
    210 
    211     if (NRegs < MRegs)
    212       D.insert(IKey(NRegs, MRegs));
    213     else
    214       D.insert(IKey(MRegs, NRegs));
    215   }
    216 
    217   // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required
    218   // for the fast interference graph construction algorithm. The last is there
    219   // to save us from looking up node ids via the VRegToNode map in the graph
    220   // metadata.
    221   typedef std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId>
    222     IntervalInfo;
    223 
    224   static SlotIndex getStartPoint(const IntervalInfo &I) {
    225     return std::get<0>(I)->segments[std::get<1>(I)].start;
    226   }
    227 
    228   static SlotIndex getEndPoint(const IntervalInfo &I) {
    229     return std::get<0>(I)->segments[std::get<1>(I)].end;
    230   }
    231 
    232   static PBQP::GraphBase::NodeId getNodeId(const IntervalInfo &I) {
    233     return std::get<2>(I);
    234   }
    235 
    236   static bool lowestStartPoint(const IntervalInfo &I1,
    237                                const IntervalInfo &I2) {
    238     // Condition reversed because priority queue has the *highest* element at
    239     // the front, rather than the lowest.
    240     return getStartPoint(I1) > getStartPoint(I2);
    241   }
    242 
    243   static bool lowestEndPoint(const IntervalInfo &I1,
    244                              const IntervalInfo &I2) {
    245     SlotIndex E1 = getEndPoint(I1);
    246     SlotIndex E2 = getEndPoint(I2);
    247 
    248     if (E1 < E2)
    249       return true;
    250 
    251     if (E1 > E2)
    252       return false;
    253 
    254     // If two intervals end at the same point, we need a way to break the tie or
    255     // the set will assume they're actually equal and refuse to insert a
    256     // "duplicate". Just compare the vregs - fast and guaranteed unique.
    257     return std::get<0>(I1)->reg < std::get<0>(I2)->reg;
    258   }
    259 
    260   static bool isAtLastSegment(const IntervalInfo &I) {
    261     return std::get<1>(I) == std::get<0>(I)->size() - 1;
    262   }
    263 
    264   static IntervalInfo nextSegment(const IntervalInfo &I) {
    265     return std::make_tuple(std::get<0>(I), std::get<1>(I) + 1, std::get<2>(I));
    266   }
    267 
    268 public:
    269 
    270   void apply(PBQPRAGraph &G) override {
    271     // The following is loosely based on the linear scan algorithm introduced in
    272     // "Linear Scan Register Allocation" by Poletto and Sarkar. This version
    273     // isn't linear, because the size of the active set isn't bound by the
    274     // number of registers, but rather the size of the largest clique in the
    275     // graph. Still, we expect this to be better than N^2.
    276     LiveIntervals &LIS = G.getMetadata().LIS;
    277 
    278     // Interferenc matrices are incredibly regular - they're only a function of
    279     // the allowed sets, so we cache them to avoid the overhead of constructing
    280     // and uniquing them.
    281     IMatrixCache C;
    282 
    283     // Finding an edge is expensive in the worst case (O(max_clique(G))). So
    284     // cache locally edges we have already seen.
    285     IEdgeCache EC;
    286 
    287     // Cache known disjoint allowed registers pairs
    288     DisjointAllowedRegsCache D;
    289 
    290     typedef std::set<IntervalInfo, decltype(&lowestEndPoint)> IntervalSet;
    291     typedef std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
    292                                 decltype(&lowestStartPoint)> IntervalQueue;
    293     IntervalSet Active(lowestEndPoint);
    294     IntervalQueue Inactive(lowestStartPoint);
    295 
    296     // Start by building the inactive set.
    297     for (auto NId : G.nodeIds()) {
    298       unsigned VReg = G.getNodeMetadata(NId).getVReg();
    299       LiveInterval &LI = LIS.getInterval(VReg);
    300       assert(!LI.empty() && "PBQP graph contains node for empty interval");
    301       Inactive.push(std::make_tuple(&LI, 0, NId));
    302     }
    303 
    304     while (!Inactive.empty()) {
    305       // Tentatively grab the "next" interval - this choice may be overriden
    306       // below.
    307       IntervalInfo Cur = Inactive.top();
    308 
    309       // Retire any active intervals that end before Cur starts.
    310       IntervalSet::iterator RetireItr = Active.begin();
    311       while (RetireItr != Active.end() &&
    312              (getEndPoint(*RetireItr) <= getStartPoint(Cur))) {
    313         // If this interval has subsequent segments, add the next one to the
    314         // inactive list.
    315         if (!isAtLastSegment(*RetireItr))
    316           Inactive.push(nextSegment(*RetireItr));
    317 
    318         ++RetireItr;
    319       }
    320       Active.erase(Active.begin(), RetireItr);
    321 
    322       // One of the newly retired segments may actually start before the
    323       // Cur segment, so re-grab the front of the inactive list.
    324       Cur = Inactive.top();
    325       Inactive.pop();
    326 
    327       // At this point we know that Cur overlaps all active intervals. Add the
    328       // interference edges.
    329       PBQP::GraphBase::NodeId NId = getNodeId(Cur);
    330       for (const auto &A : Active) {
    331         PBQP::GraphBase::NodeId MId = getNodeId(A);
    332 
    333         // Do not add an edge when the nodes' allowed registers do not
    334         // intersect: there is obviously no interference.
    335         if (haveDisjointAllowedRegs(G, NId, MId, D))
    336           continue;
    337 
    338         // Check that we haven't already added this edge
    339         IEdgeKey EK(std::min(NId, MId), std::max(NId, MId));
    340         if (EC.count(EK))
    341           continue;
    342 
    343         // This is a new edge - add it to the graph.
    344         if (!createInterferenceEdge(G, NId, MId, C))
    345           setDisjointAllowedRegs(G, NId, MId, D);
    346         else
    347           EC.insert(EK);
    348       }
    349 
    350       // Finally, add Cur to the Active set.
    351       Active.insert(Cur);
    352     }
    353   }
    354 
    355 private:
    356 
    357   // Create an Interference edge and add it to the graph, unless it is
    358   // a null matrix, meaning the nodes' allowed registers do not have any
    359   // interference. This case occurs frequently between integer and floating
    360   // point registers for example.
    361   // return true iff both nodes interferes.
    362   bool createInterferenceEdge(PBQPRAGraph &G,
    363                               PBQPRAGraph::NodeId NId, PBQPRAGraph::NodeId MId,
    364                               IMatrixCache &C) {
    365 
    366     const TargetRegisterInfo &TRI =
    367         *G.getMetadata().MF.getSubtarget().getRegisterInfo();
    368     const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs();
    369     const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs();
    370 
    371     // Try looking the edge costs up in the IMatrixCache first.
    372     IKey K(&NRegs, &MRegs);
    373     IMatrixCache::iterator I = C.find(K);
    374     if (I != C.end()) {
    375       G.addEdgeBypassingCostAllocator(NId, MId, I->second);
    376       return true;
    377     }
    378 
    379     PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0);
    380     bool NodesInterfere = false;
    381     for (unsigned I = 0; I != NRegs.size(); ++I) {
    382       unsigned PRegN = NRegs[I];
    383       for (unsigned J = 0; J != MRegs.size(); ++J) {
    384         unsigned PRegM = MRegs[J];
    385         if (TRI.regsOverlap(PRegN, PRegM)) {
    386           M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
    387           NodesInterfere = true;
    388         }
    389       }
    390     }
    391 
    392     if (!NodesInterfere)
    393       return false;
    394 
    395     PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M));
    396     C[K] = G.getEdgeCostsPtr(EId);
    397 
    398     return true;
    399   }
    400 };
    401 
    402 
    403 class Coalescing : public PBQPRAConstraint {
    404 public:
    405   void apply(PBQPRAGraph &G) override {
    406     MachineFunction &MF = G.getMetadata().MF;
    407     MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI;
    408     CoalescerPair CP(*MF.getSubtarget().getRegisterInfo());
    409 
    410     // Scan the machine function and add a coalescing cost whenever CoalescerPair
    411     // gives the Ok.
    412     for (const auto &MBB : MF) {
    413       for (const auto &MI : MBB) {
    414 
    415         // Skip not-coalescable or already coalesced copies.
    416         if (!CP.setRegisters(&MI) || CP.getSrcReg() == CP.getDstReg())
    417           continue;
    418 
    419         unsigned DstReg = CP.getDstReg();
    420         unsigned SrcReg = CP.getSrcReg();
    421 
    422         const float Scale = 1.0f / MBFI.getEntryFreq();
    423         PBQP::PBQPNum CBenefit = MBFI.getBlockFreq(&MBB).getFrequency() * Scale;
    424 
    425         if (CP.isPhys()) {
    426           if (!MF.getRegInfo().isAllocatable(DstReg))
    427             continue;
    428 
    429           PBQPRAGraph::NodeId NId = G.getMetadata().getNodeIdForVReg(SrcReg);
    430 
    431           const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed =
    432             G.getNodeMetadata(NId).getAllowedRegs();
    433 
    434           unsigned PRegOpt = 0;
    435           while (PRegOpt < Allowed.size() && Allowed[PRegOpt] != DstReg)
    436             ++PRegOpt;
    437 
    438           if (PRegOpt < Allowed.size()) {
    439             PBQPRAGraph::RawVector NewCosts(G.getNodeCosts(NId));
    440             NewCosts[PRegOpt + 1] -= CBenefit;
    441             G.setNodeCosts(NId, std::move(NewCosts));
    442           }
    443         } else {
    444           PBQPRAGraph::NodeId N1Id = G.getMetadata().getNodeIdForVReg(DstReg);
    445           PBQPRAGraph::NodeId N2Id = G.getMetadata().getNodeIdForVReg(SrcReg);
    446           const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed1 =
    447             &G.getNodeMetadata(N1Id).getAllowedRegs();
    448           const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed2 =
    449             &G.getNodeMetadata(N2Id).getAllowedRegs();
    450 
    451           PBQPRAGraph::EdgeId EId = G.findEdge(N1Id, N2Id);
    452           if (EId == G.invalidEdgeId()) {
    453             PBQPRAGraph::RawMatrix Costs(Allowed1->size() + 1,
    454                                          Allowed2->size() + 1, 0);
    455             addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
    456             G.addEdge(N1Id, N2Id, std::move(Costs));
    457           } else {
    458             if (G.getEdgeNode1Id(EId) == N2Id) {
    459               std::swap(N1Id, N2Id);
    460               std::swap(Allowed1, Allowed2);
    461             }
    462             PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId));
    463             addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
    464             G.updateEdgeCosts(EId, std::move(Costs));
    465           }
    466         }
    467       }
    468     }
    469   }
    470 
    471 private:
    472 
    473   void addVirtRegCoalesce(
    474                     PBQPRAGraph::RawMatrix &CostMat,
    475                     const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1,
    476                     const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed2,
    477                     PBQP::PBQPNum Benefit) {
    478     assert(CostMat.getRows() == Allowed1.size() + 1 && "Size mismatch.");
    479     assert(CostMat.getCols() == Allowed2.size() + 1 && "Size mismatch.");
    480     for (unsigned I = 0; I != Allowed1.size(); ++I) {
    481       unsigned PReg1 = Allowed1[I];
    482       for (unsigned J = 0; J != Allowed2.size(); ++J) {
    483         unsigned PReg2 = Allowed2[J];
    484         if (PReg1 == PReg2)
    485           CostMat[I + 1][J + 1] -= Benefit;
    486       }
    487     }
    488   }
    489 
    490 };
    491 
    492 } // End anonymous namespace.
    493 
    494 // Out-of-line destructor/anchor for PBQPRAConstraint.
    495 PBQPRAConstraint::~PBQPRAConstraint() {}
    496 void PBQPRAConstraint::anchor() {}
    497 void PBQPRAConstraintList::anchor() {}
    498 
    499 void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
    500   au.setPreservesCFG();
    501   au.addRequired<AAResultsWrapperPass>();
    502   au.addPreserved<AAResultsWrapperPass>();
    503   au.addRequired<SlotIndexes>();
    504   au.addPreserved<SlotIndexes>();
    505   au.addRequired<LiveIntervals>();
    506   au.addPreserved<LiveIntervals>();
    507   //au.addRequiredID(SplitCriticalEdgesID);
    508   if (customPassID)
    509     au.addRequiredID(*customPassID);
    510   au.addRequired<LiveStacks>();
    511   au.addPreserved<LiveStacks>();
    512   au.addRequired<MachineBlockFrequencyInfo>();
    513   au.addPreserved<MachineBlockFrequencyInfo>();
    514   au.addRequired<MachineLoopInfo>();
    515   au.addPreserved<MachineLoopInfo>();
    516   au.addRequired<MachineDominatorTree>();
    517   au.addPreserved<MachineDominatorTree>();
    518   au.addRequired<VirtRegMap>();
    519   au.addPreserved<VirtRegMap>();
    520   MachineFunctionPass::getAnalysisUsage(au);
    521 }
    522 
    523 void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF,
    524                                             LiveIntervals &LIS) {
    525   const MachineRegisterInfo &MRI = MF.getRegInfo();
    526 
    527   // Iterate over all live ranges.
    528   for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
    529     unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
    530     if (MRI.reg_nodbg_empty(Reg))
    531       continue;
    532     LiveInterval &LI = LIS.getInterval(Reg);
    533 
    534     // If this live interval is non-empty we will use pbqp to allocate it.
    535     // Empty intervals we allocate in a simple post-processing stage in
    536     // finalizeAlloc.
    537     if (!LI.empty()) {
    538       VRegsToAlloc.insert(LI.reg);
    539     } else {
    540       EmptyIntervalVRegs.insert(LI.reg);
    541     }
    542   }
    543 }
    544 
    545 static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI,
    546                                    const MachineFunction &MF) {
    547   const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF);
    548   for (unsigned i = 0; CSR[i] != 0; ++i)
    549     if (TRI.regsOverlap(reg, CSR[i]))
    550       return true;
    551   return false;
    552 }
    553 
    554 void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
    555                                    Spiller &VRegSpiller) {
    556   MachineFunction &MF = G.getMetadata().MF;
    557 
    558   LiveIntervals &LIS = G.getMetadata().LIS;
    559   const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
    560   const TargetRegisterInfo &TRI =
    561       *G.getMetadata().MF.getSubtarget().getRegisterInfo();
    562 
    563   std::vector<unsigned> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end());
    564 
    565   while (!Worklist.empty()) {
    566     unsigned VReg = Worklist.back();
    567     Worklist.pop_back();
    568 
    569     const TargetRegisterClass *TRC = MRI.getRegClass(VReg);
    570     LiveInterval &VRegLI = LIS.getInterval(VReg);
    571 
    572     // Record any overlaps with regmask operands.
    573     BitVector RegMaskOverlaps;
    574     LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps);
    575 
    576     // Compute an initial allowed set for the current vreg.
    577     std::vector<unsigned> VRegAllowed;
    578     ArrayRef<MCPhysReg> RawPRegOrder = TRC->getRawAllocationOrder(MF);
    579     for (unsigned I = 0; I != RawPRegOrder.size(); ++I) {
    580       unsigned PReg = RawPRegOrder[I];
    581       if (MRI.isReserved(PReg))
    582         continue;
    583 
    584       // vregLI crosses a regmask operand that clobbers preg.
    585       if (!RegMaskOverlaps.empty() && !RegMaskOverlaps.test(PReg))
    586         continue;
    587 
    588       // vregLI overlaps fixed regunit interference.
    589       bool Interference = false;
    590       for (MCRegUnitIterator Units(PReg, &TRI); Units.isValid(); ++Units) {
    591         if (VRegLI.overlaps(LIS.getRegUnit(*Units))) {
    592           Interference = true;
    593           break;
    594         }
    595       }
    596       if (Interference)
    597         continue;
    598 
    599       // preg is usable for this virtual register.
    600       VRegAllowed.push_back(PReg);
    601     }
    602 
    603     // Check for vregs that have no allowed registers. These should be
    604     // pre-spilled and the new vregs added to the worklist.
    605     if (VRegAllowed.empty()) {
    606       SmallVector<unsigned, 8> NewVRegs;
    607       spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
    608       Worklist.insert(Worklist.end(), NewVRegs.begin(), NewVRegs.end());
    609       continue;
    610     }
    611 
    612     PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0);
    613 
    614     // Tweak cost of callee saved registers, as using then force spilling and
    615     // restoring them. This would only happen in the prologue / epilogue though.
    616     for (unsigned i = 0; i != VRegAllowed.size(); ++i)
    617       if (isACalleeSavedRegister(VRegAllowed[i], TRI, MF))
    618         NodeCosts[1 + i] += 1.0;
    619 
    620     PBQPRAGraph::NodeId NId = G.addNode(std::move(NodeCosts));
    621     G.getNodeMetadata(NId).setVReg(VReg);
    622     G.getNodeMetadata(NId).setAllowedRegs(
    623       G.getMetadata().getAllowedRegs(std::move(VRegAllowed)));
    624     G.getMetadata().setNodeIdForVReg(VReg, NId);
    625   }
    626 }
    627 
    628 void RegAllocPBQP::spillVReg(unsigned VReg,
    629                              SmallVectorImpl<unsigned> &NewIntervals,
    630                              MachineFunction &MF, LiveIntervals &LIS,
    631                              VirtRegMap &VRM, Spiller &VRegSpiller) {
    632 
    633   VRegsToAlloc.erase(VReg);
    634   LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM);
    635   VRegSpiller.spill(LRE);
    636 
    637   const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
    638   (void)TRI;
    639   DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: "
    640                << LRE.getParent().weight << ", New vregs: ");
    641 
    642   // Copy any newly inserted live intervals into the list of regs to
    643   // allocate.
    644   for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end();
    645        I != E; ++I) {
    646     const LiveInterval &LI = LIS.getInterval(*I);
    647     assert(!LI.empty() && "Empty spill range.");
    648     DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " ");
    649     VRegsToAlloc.insert(LI.reg);
    650   }
    651 
    652   DEBUG(dbgs() << ")\n");
    653 }
    654 
    655 bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
    656                                      const PBQP::Solution &Solution,
    657                                      VirtRegMap &VRM,
    658                                      Spiller &VRegSpiller) {
    659   MachineFunction &MF = G.getMetadata().MF;
    660   LiveIntervals &LIS = G.getMetadata().LIS;
    661   const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
    662   (void)TRI;
    663 
    664   // Set to true if we have any spills
    665   bool AnotherRoundNeeded = false;
    666 
    667   // Clear the existing allocation.
    668   VRM.clearAllVirt();
    669 
    670   // Iterate over the nodes mapping the PBQP solution to a register
    671   // assignment.
    672   for (auto NId : G.nodeIds()) {
    673     unsigned VReg = G.getNodeMetadata(NId).getVReg();
    674     unsigned AllocOption = Solution.getSelection(NId);
    675 
    676     if (AllocOption != PBQP::RegAlloc::getSpillOptionIdx()) {
    677       unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1];
    678       DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> "
    679             << TRI.getName(PReg) << "\n");
    680       assert(PReg != 0 && "Invalid preg selected.");
    681       VRM.assignVirt2Phys(VReg, PReg);
    682     } else {
    683       // Spill VReg. If this introduces new intervals we'll need another round
    684       // of allocation.
    685       SmallVector<unsigned, 8> NewVRegs;
    686       spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
    687       AnotherRoundNeeded |= !NewVRegs.empty();
    688     }
    689   }
    690 
    691   return !AnotherRoundNeeded;
    692 }
    693 
    694 void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
    695                                  LiveIntervals &LIS,
    696                                  VirtRegMap &VRM) const {
    697   MachineRegisterInfo &MRI = MF.getRegInfo();
    698 
    699   // First allocate registers for the empty intervals.
    700   for (RegSet::const_iterator
    701          I = EmptyIntervalVRegs.begin(), E = EmptyIntervalVRegs.end();
    702          I != E; ++I) {
    703     LiveInterval &LI = LIS.getInterval(*I);
    704 
    705     unsigned PReg = MRI.getSimpleHint(LI.reg);
    706 
    707     if (PReg == 0) {
    708       const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg);
    709       PReg = RC.getRawAllocationOrder(MF).front();
    710     }
    711 
    712     VRM.assignVirt2Phys(LI.reg, PReg);
    713   }
    714 }
    715 
    716 static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size,
    717                                          unsigned NumInstr) {
    718   // All intervals have a spill weight that is mostly proportional to the number
    719   // of uses, with uses in loops having a bigger weight.
    720   return NumInstr * normalizeSpillWeight(UseDefFreq, Size, 1);
    721 }
    722 
    723 bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
    724   LiveIntervals &LIS = getAnalysis<LiveIntervals>();
    725   MachineBlockFrequencyInfo &MBFI =
    726     getAnalysis<MachineBlockFrequencyInfo>();
    727 
    728   VirtRegMap &VRM = getAnalysis<VirtRegMap>();
    729 
    730   calculateSpillWeightsAndHints(LIS, MF, &VRM, getAnalysis<MachineLoopInfo>(),
    731                                 MBFI, normalizePBQPSpillWeight);
    732 
    733   std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM));
    734 
    735   MF.getRegInfo().freezeReservedRegs(MF);
    736 
    737   DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n");
    738 
    739   // Allocator main loop:
    740   //
    741   // * Map current regalloc problem to a PBQP problem
    742   // * Solve the PBQP problem
    743   // * Map the solution back to a register allocation
    744   // * Spill if necessary
    745   //
    746   // This process is continued till no more spills are generated.
    747 
    748   // Find the vreg intervals in need of allocation.
    749   findVRegIntervalsToAlloc(MF, LIS);
    750 
    751 #ifndef NDEBUG
    752   const Function &F = *MF.getFunction();
    753   std::string FullyQualifiedName =
    754     F.getParent()->getModuleIdentifier() + "." + F.getName().str();
    755 #endif
    756 
    757   // If there are non-empty intervals allocate them using pbqp.
    758   if (!VRegsToAlloc.empty()) {
    759 
    760     const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
    761     std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot =
    762       llvm::make_unique<PBQPRAConstraintList>();
    763     ConstraintsRoot->addConstraint(llvm::make_unique<SpillCosts>());
    764     ConstraintsRoot->addConstraint(llvm::make_unique<Interference>());
    765     if (PBQPCoalescing)
    766       ConstraintsRoot->addConstraint(llvm::make_unique<Coalescing>());
    767     ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints());
    768 
    769     bool PBQPAllocComplete = false;
    770     unsigned Round = 0;
    771 
    772     while (!PBQPAllocComplete) {
    773       DEBUG(dbgs() << "  PBQP Regalloc round " << Round << ":\n");
    774 
    775       PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI));
    776       initializeGraph(G, VRM, *VRegSpiller);
    777       ConstraintsRoot->apply(G);
    778 
    779 #ifndef NDEBUG
    780       if (PBQPDumpGraphs) {
    781         std::ostringstream RS;
    782         RS << Round;
    783         std::string GraphFileName = FullyQualifiedName + "." + RS.str() +
    784                                     ".pbqpgraph";
    785         std::error_code EC;
    786         raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text);
    787         DEBUG(dbgs() << "Dumping graph for round " << Round << " to \""
    788               << GraphFileName << "\"\n");
    789         G.dump(OS);
    790       }
    791 #endif
    792 
    793       PBQP::Solution Solution = PBQP::RegAlloc::solve(G);
    794       PBQPAllocComplete = mapPBQPToRegAlloc(G, Solution, VRM, *VRegSpiller);
    795       ++Round;
    796     }
    797   }
    798 
    799   // Finalise allocation, allocate empty ranges.
    800   finalizeAlloc(MF, LIS, VRM);
    801   VRegsToAlloc.clear();
    802   EmptyIntervalVRegs.clear();
    803 
    804   DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n");
    805 
    806   return true;
    807 }
    808 
    809 /// Create Printable object for node and register info.
    810 static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId,
    811                                const PBQP::RegAlloc::PBQPRAGraph &G) {
    812   return Printable([NId, &G](raw_ostream &OS) {
    813     const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
    814     const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
    815     unsigned VReg = G.getNodeMetadata(NId).getVReg();
    816     const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg));
    817     OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')';
    818   });
    819 }
    820 
    821 void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const {
    822   for (auto NId : nodeIds()) {
    823     const Vector &Costs = getNodeCosts(NId);
    824     assert(Costs.getLength() != 0 && "Empty vector in graph.");
    825     OS << PrintNodeInfo(NId, *this) << ": " << Costs << '\n';
    826   }
    827   OS << '\n';
    828 
    829   for (auto EId : edgeIds()) {
    830     NodeId N1Id = getEdgeNode1Id(EId);
    831     NodeId N2Id = getEdgeNode2Id(EId);
    832     assert(N1Id != N2Id && "PBQP graphs should not have self-edges.");
    833     const Matrix &M = getEdgeCosts(EId);
    834     assert(M.getRows() != 0 && "No rows in matrix.");
    835     assert(M.getCols() != 0 && "No cols in matrix.");
    836     OS << PrintNodeInfo(N1Id, *this) << ' ' << M.getRows() << " rows / ";
    837     OS << PrintNodeInfo(N2Id, *this) << ' ' << M.getCols() << " cols:\n";
    838     OS << M << '\n';
    839   }
    840 }
    841 
    842 void PBQP::RegAlloc::PBQPRAGraph::dump() const { dump(dbgs()); }
    843 
    844 void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream &OS) const {
    845   OS << "graph {\n";
    846   for (auto NId : nodeIds()) {
    847     OS << "  node" << NId << " [ label=\""
    848        << PrintNodeInfo(NId, *this) << "\\n"
    849        << getNodeCosts(NId) << "\" ]\n";
    850   }
    851 
    852   OS << "  edge [ len=" << nodeIds().size() << " ]\n";
    853   for (auto EId : edgeIds()) {
    854     OS << "  node" << getEdgeNode1Id(EId)
    855        << " -- node" << getEdgeNode2Id(EId)
    856        << " [ label=\"";
    857     const Matrix &EdgeCosts = getEdgeCosts(EId);
    858     for (unsigned i = 0; i < EdgeCosts.getRows(); ++i) {
    859       OS << EdgeCosts.getRowAsVector(i) << "\\n";
    860     }
    861     OS << "\" ]\n";
    862   }
    863   OS << "}\n";
    864 }
    865 
    866 FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) {
    867   return new RegAllocPBQP(customPassID);
    868 }
    869 
    870 FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
    871   return createPBQPRegisterAllocator();
    872 }
    873 
    874 #undef DEBUG_TYPE
    875