Home | History | Annotate | Download | only in Hexagon
      1 //===-- HexagonFrameLowering.cpp - Define frame lowering ------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //
      9 //===----------------------------------------------------------------------===//
     10 
     11 #define DEBUG_TYPE "hexagon-pei"
     12 
     13 #include "HexagonFrameLowering.h"
     14 #include "Hexagon.h"
     15 #include "HexagonInstrInfo.h"
     16 #include "HexagonMachineFunctionInfo.h"
     17 #include "HexagonRegisterInfo.h"
     18 #include "HexagonSubtarget.h"
     19 #include "HexagonTargetMachine.h"
     20 #include "llvm/ADT/BitVector.h"
     21 #include "llvm/ADT/PostOrderIterator.h"
     22 #include "llvm/ADT/STLExtras.h"
     23 #include "llvm/CodeGen/MachineDominators.h"
     24 #include "llvm/CodeGen/MachineInstrBuilder.h"
     25 #include "llvm/CodeGen/MachineFunction.h"
     26 #include "llvm/CodeGen/MachineFunctionPass.h"
     27 #include "llvm/CodeGen/MachineInstrBuilder.h"
     28 #include "llvm/CodeGen/MachineModuleInfo.h"
     29 #include "llvm/CodeGen/MachinePostDominators.h"
     30 #include "llvm/CodeGen/MachineRegisterInfo.h"
     31 #include "llvm/CodeGen/RegisterScavenging.h"
     32 #include "llvm/IR/Function.h"
     33 #include "llvm/IR/Type.h"
     34 #include "llvm/Support/CommandLine.h"
     35 #include "llvm/Support/Debug.h"
     36 #include "llvm/Support/raw_ostream.h"
     37 #include "llvm/Target/TargetInstrInfo.h"
     38 #include "llvm/Target/TargetMachine.h"
     39 #include "llvm/Target/TargetOptions.h"
     40 
     41 // Hexagon stack frame layout as defined by the ABI:
     42 //
     43 //                                                       Incoming arguments
     44 //                                                       passed via stack
     45 //                                                                      |
     46 //                                                                      |
     47 //        SP during function's                 FP during function's     |
     48 //    +-- runtime (top of stack)               runtime (bottom) --+     |
     49 //    |                                                           |     |
     50 // --++---------------------+------------------+-----------------++-+-------
     51 //   |  parameter area for  |  variable-size   |   fixed-size    |LR|  arg
     52 //   |   called functions   |  local objects   |  local objects  |FP|
     53 // --+----------------------+------------------+-----------------+--+-------
     54 //    <-    size known    -> <- size unknown -> <- size known  ->
     55 //
     56 // Low address                                                 High address
     57 //
     58 // <--- stack growth
     59 //
     60 //
     61 // - In any circumstances, the outgoing function arguments are always accessi-
     62 //   ble using the SP, and the incoming arguments are accessible using the FP.
     63 // - If the local objects are not aligned, they can always be accessed using
     64 //   the FP.
     65 // - If there are no variable-sized objects, the local objects can always be
     66 //   accessed using the SP, regardless whether they are aligned or not. (The
     67 //   alignment padding will be at the bottom of the stack (highest address),
     68 //   and so the offset with respect to the SP will be known at the compile-
     69 //   -time.)
     70 //
     71 // The only complication occurs if there are both, local aligned objects, and
     72 // dynamically allocated (variable-sized) objects. The alignment pad will be
     73 // placed between the FP and the local objects, thus preventing the use of the
     74 // FP to access the local objects. At the same time, the variable-sized objects
     75 // will be between the SP and the local objects, thus introducing an unknown
     76 // distance from the SP to the locals.
     77 //
     78 // To avoid this problem, a new register is created that holds the aligned
     79 // address of the bottom of the stack, referred in the sources as AP (aligned
     80 // pointer). The AP will be equal to "FP-p", where "p" is the smallest pad
     81 // that aligns AP to the required boundary (a maximum of the alignments of
     82 // all stack objects, fixed- and variable-sized). All local objects[1] will
     83 // then use AP as the base pointer.
     84 // [1] The exception is with "fixed" stack objects. "Fixed" stack objects get
     85 // their name from being allocated at fixed locations on the stack, relative
     86 // to the FP. In the presence of dynamic allocation and local alignment, such
     87 // objects can only be accessed through the FP.
     88 //
     89 // Illustration of the AP:
     90 //                                                                FP --+
     91 //                                                                     |
     92 // ---------------+---------------------+-----+-----------------------++-+--
     93 //   Rest of the  | Local stack objects | Pad |  Fixed stack objects  |LR|
     94 //   stack frame  | (aligned)           |     |  (CSR, spills, etc.)  |FP|
     95 // ---------------+---------------------+-----+-----------------+-----+--+--
     96 //                                      |<-- Multiple of the -->|
     97 //                                           stack alignment    +-- AP
     98 //
     99 // The AP is set up at the beginning of the function. Since it is not a dedi-
    100 // cated (reserved) register, it needs to be kept live throughout the function
    101 // to be available as the base register for local object accesses.
    102 // Normally, an address of a stack objects is obtained by a pseudo-instruction
    103 // TFR_FI. To access local objects with the AP register present, a different
    104 // pseudo-instruction needs to be used: TFR_FIA. The TFR_FIA takes one extra
    105 // argument compared to TFR_FI: the first input register is the AP register.
    106 // This keeps the register live between its definition and its uses.
    107 
    108 // The AP register is originally set up using pseudo-instruction ALIGNA:
    109 //   AP = ALIGNA A
    110 // where
    111 //   A  - required stack alignment
    112 // The alignment value must be the maximum of all alignments required by
    113 // any stack object.
    114 
    115 // The dynamic allocation uses a pseudo-instruction ALLOCA:
    116 //   Rd = ALLOCA Rs, A
    117 // where
    118 //   Rd - address of the allocated space
    119 //   Rs - minimum size (the actual allocated can be larger to accommodate
    120 //        alignment)
    121 //   A  - required alignment
    122 
    123 
    124 using namespace llvm;
    125 
    126 static cl::opt<bool> DisableDeallocRet("disable-hexagon-dealloc-ret",
    127     cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target"));
    128 
    129 
    130 static cl::opt<int> NumberScavengerSlots("number-scavenger-slots",
    131     cl::Hidden, cl::desc("Set the number of scavenger slots"), cl::init(2),
    132     cl::ZeroOrMore);
    133 
    134 static cl::opt<int> SpillFuncThreshold("spill-func-threshold",
    135     cl::Hidden, cl::desc("Specify O2(not Os) spill func threshold"),
    136     cl::init(6), cl::ZeroOrMore);
    137 
    138 static cl::opt<int> SpillFuncThresholdOs("spill-func-threshold-Os",
    139     cl::Hidden, cl::desc("Specify Os spill func threshold"),
    140     cl::init(1), cl::ZeroOrMore);
    141 
    142 static cl::opt<bool> EnableShrinkWrapping("hexagon-shrink-frame",
    143     cl::init(true), cl::Hidden, cl::ZeroOrMore,
    144     cl::desc("Enable stack frame shrink wrapping"));
    145 
    146 static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit", cl::init(UINT_MAX),
    147     cl::Hidden, cl::ZeroOrMore, cl::desc("Max count of stack frame "
    148     "shrink-wraps"));
    149 
    150 static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true),
    151     cl::Hidden, cl::desc("Use allocframe more conservatively"));
    152 
    153 
    154 namespace llvm {
    155   void initializeHexagonCallFrameInformationPass(PassRegistry&);
    156   FunctionPass *createHexagonCallFrameInformation();
    157 }
    158 
    159 namespace {
    160   class HexagonCallFrameInformation : public MachineFunctionPass {
    161   public:
    162     static char ID;
    163     HexagonCallFrameInformation() : MachineFunctionPass(ID) {
    164       PassRegistry &PR = *PassRegistry::getPassRegistry();
    165       initializeHexagonCallFrameInformationPass(PR);
    166     }
    167     bool runOnMachineFunction(MachineFunction &MF) override;
    168   };
    169 
    170   char HexagonCallFrameInformation::ID = 0;
    171 }
    172 
    173 bool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) {
    174   auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering();
    175   bool NeedCFI = MF.getMMI().hasDebugInfo() ||
    176                  MF.getFunction()->needsUnwindTableEntry();
    177 
    178   if (!NeedCFI)
    179     return false;
    180   HFI.insertCFIInstructions(MF);
    181   return true;
    182 }
    183 
    184 INITIALIZE_PASS(HexagonCallFrameInformation, "hexagon-cfi",
    185                 "Hexagon call frame information", false, false)
    186 
    187 FunctionPass *llvm::createHexagonCallFrameInformation() {
    188   return new HexagonCallFrameInformation();
    189 }
    190 
    191 
    192 namespace {
    193   /// Map a register pair Reg to the subregister that has the greater "number",
    194   /// i.e. D3 (aka R7:6) will be mapped to R7, etc.
    195   unsigned getMax32BitSubRegister(unsigned Reg, const TargetRegisterInfo &TRI,
    196                                   bool hireg = true) {
    197     if (Reg < Hexagon::D0 || Reg > Hexagon::D15)
    198       return Reg;
    199 
    200     unsigned RegNo = 0;
    201     for (MCSubRegIterator SubRegs(Reg, &TRI); SubRegs.isValid(); ++SubRegs) {
    202       if (hireg) {
    203         if (*SubRegs > RegNo)
    204           RegNo = *SubRegs;
    205       } else {
    206         if (!RegNo || *SubRegs < RegNo)
    207           RegNo = *SubRegs;
    208       }
    209     }
    210     return RegNo;
    211   }
    212 
    213   /// Returns the callee saved register with the largest id in the vector.
    214   unsigned getMaxCalleeSavedReg(const std::vector<CalleeSavedInfo> &CSI,
    215                                 const TargetRegisterInfo &TRI) {
    216     assert(Hexagon::R1 > 0 &&
    217            "Assume physical registers are encoded as positive integers");
    218     if (CSI.empty())
    219       return 0;
    220 
    221     unsigned Max = getMax32BitSubRegister(CSI[0].getReg(), TRI);
    222     for (unsigned I = 1, E = CSI.size(); I < E; ++I) {
    223       unsigned Reg = getMax32BitSubRegister(CSI[I].getReg(), TRI);
    224       if (Reg > Max)
    225         Max = Reg;
    226     }
    227     return Max;
    228   }
    229 
    230   /// Checks if the basic block contains any instruction that needs a stack
    231   /// frame to be already in place.
    232   bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR) {
    233     for (auto &I : MBB) {
    234       const MachineInstr *MI = &I;
    235       if (MI->isCall())
    236         return true;
    237       unsigned Opc = MI->getOpcode();
    238       switch (Opc) {
    239         case Hexagon::ALLOCA:
    240         case Hexagon::ALIGNA:
    241           return true;
    242         default:
    243           break;
    244       }
    245       // Check individual operands.
    246       for (const MachineOperand &MO : MI->operands()) {
    247         // While the presence of a frame index does not prove that a stack
    248         // frame will be required, all frame indexes should be within alloc-
    249         // frame/deallocframe. Otherwise, the code that translates a frame
    250         // index into an offset would have to be aware of the placement of
    251         // the frame creation/destruction instructions.
    252         if (MO.isFI())
    253           return true;
    254         if (!MO.isReg())
    255           continue;
    256         unsigned R = MO.getReg();
    257         // Virtual registers will need scavenging, which then may require
    258         // a stack slot.
    259         if (TargetRegisterInfo::isVirtualRegister(R))
    260           return true;
    261         if (CSR[R])
    262           return true;
    263       }
    264     }
    265     return false;
    266   }
    267 
    268   /// Returns true if MBB has a machine instructions that indicates a tail call
    269   /// in the block.
    270   bool hasTailCall(const MachineBasicBlock &MBB) {
    271     MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr();
    272     unsigned RetOpc = I->getOpcode();
    273     return RetOpc == Hexagon::TCRETURNi || RetOpc == Hexagon::TCRETURNr;
    274   }
    275 
    276   /// Returns true if MBB contains an instruction that returns.
    277   bool hasReturn(const MachineBasicBlock &MBB) {
    278     for (auto I = MBB.getFirstTerminator(), E = MBB.end(); I != E; ++I)
    279       if (I->isReturn())
    280         return true;
    281     return false;
    282   }
    283 }
    284 
    285 
    286 /// Implements shrink-wrapping of the stack frame. By default, stack frame
    287 /// is created in the function entry block, and is cleaned up in every block
    288 /// that returns. This function finds alternate blocks: one for the frame
    289 /// setup (prolog) and one for the cleanup (epilog).
    290 void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
    291       MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const {
    292   static unsigned ShrinkCounter = 0;
    293 
    294   if (ShrinkLimit.getPosition()) {
    295     if (ShrinkCounter >= ShrinkLimit)
    296       return;
    297     ShrinkCounter++;
    298   }
    299 
    300   auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
    301   auto &HRI = *HST.getRegisterInfo();
    302 
    303   MachineDominatorTree MDT;
    304   MDT.runOnMachineFunction(MF);
    305   MachinePostDominatorTree MPT;
    306   MPT.runOnMachineFunction(MF);
    307 
    308   typedef DenseMap<unsigned,unsigned> UnsignedMap;
    309   UnsignedMap RPO;
    310   typedef ReversePostOrderTraversal<const MachineFunction*> RPOTType;
    311   RPOTType RPOT(&MF);
    312   unsigned RPON = 0;
    313   for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
    314     RPO[(*I)->getNumber()] = RPON++;
    315 
    316   // Don't process functions that have loops, at least for now. Placement
    317   // of prolog and epilog must take loop structure into account. For simpli-
    318   // city don't do it right now.
    319   for (auto &I : MF) {
    320     unsigned BN = RPO[I.getNumber()];
    321     for (auto SI = I.succ_begin(), SE = I.succ_end(); SI != SE; ++SI) {
    322       // If found a back-edge, return.
    323       if (RPO[(*SI)->getNumber()] <= BN)
    324         return;
    325     }
    326   }
    327 
    328   // Collect the set of blocks that need a stack frame to execute. Scan
    329   // each block for uses/defs of callee-saved registers, calls, etc.
    330   SmallVector<MachineBasicBlock*,16> SFBlocks;
    331   BitVector CSR(Hexagon::NUM_TARGET_REGS);
    332   for (const MCPhysReg *P = HRI.getCalleeSavedRegs(&MF); *P; ++P)
    333     CSR[*P] = true;
    334 
    335   for (auto &I : MF)
    336     if (needsStackFrame(I, CSR))
    337       SFBlocks.push_back(&I);
    338 
    339   DEBUG({
    340     dbgs() << "Blocks needing SF: {";
    341     for (auto &B : SFBlocks)
    342       dbgs() << " BB#" << B->getNumber();
    343     dbgs() << " }\n";
    344   });
    345   // No frame needed?
    346   if (SFBlocks.empty())
    347     return;
    348 
    349   // Pick a common dominator and a common post-dominator.
    350   MachineBasicBlock *DomB = SFBlocks[0];
    351   for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {
    352     DomB = MDT.findNearestCommonDominator(DomB, SFBlocks[i]);
    353     if (!DomB)
    354       break;
    355   }
    356   MachineBasicBlock *PDomB = SFBlocks[0];
    357   for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {
    358     PDomB = MPT.findNearestCommonDominator(PDomB, SFBlocks[i]);
    359     if (!PDomB)
    360       break;
    361   }
    362   DEBUG({
    363     dbgs() << "Computed dom block: BB#";
    364     if (DomB) dbgs() << DomB->getNumber();
    365     else      dbgs() << "<null>";
    366     dbgs() << ", computed pdom block: BB#";
    367     if (PDomB) dbgs() << PDomB->getNumber();
    368     else       dbgs() << "<null>";
    369     dbgs() << "\n";
    370   });
    371   if (!DomB || !PDomB)
    372     return;
    373 
    374   // Make sure that DomB dominates PDomB and PDomB post-dominates DomB.
    375   if (!MDT.dominates(DomB, PDomB)) {
    376     DEBUG(dbgs() << "Dom block does not dominate pdom block\n");
    377     return;
    378   }
    379   if (!MPT.dominates(PDomB, DomB)) {
    380     DEBUG(dbgs() << "PDom block does not post-dominate dom block\n");
    381     return;
    382   }
    383 
    384   // Finally, everything seems right.
    385   PrologB = DomB;
    386   EpilogB = PDomB;
    387 }
    388 
    389 /// Perform most of the PEI work here:
    390 /// - saving/restoring of the callee-saved registers,
    391 /// - stack frame creation and destruction.
    392 /// Normally, this work is distributed among various functions, but doing it
    393 /// in one place allows shrink-wrapping of the stack frame.
    394 void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
    395                                         MachineBasicBlock &MBB) const {
    396   auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
    397   auto &HRI = *HST.getRegisterInfo();
    398 
    399   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
    400   MachineFrameInfo *MFI = MF.getFrameInfo();
    401   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
    402 
    403   MachineBasicBlock *PrologB = &MF.front(), *EpilogB = nullptr;
    404   if (EnableShrinkWrapping)
    405     findShrunkPrologEpilog(MF, PrologB, EpilogB);
    406 
    407   insertCSRSpillsInBlock(*PrologB, CSI, HRI);
    408   insertPrologueInBlock(*PrologB);
    409 
    410   if (EpilogB) {
    411     insertCSRRestoresInBlock(*EpilogB, CSI, HRI);
    412     insertEpilogueInBlock(*EpilogB);
    413   } else {
    414     for (auto &B : MF)
    415       if (B.isReturnBlock())
    416         insertCSRRestoresInBlock(B, CSI, HRI);
    417 
    418     for (auto &B : MF)
    419       if (B.isReturnBlock())
    420         insertEpilogueInBlock(B);
    421   }
    422 }
    423 
    424 
    425 void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const {
    426   MachineFunction &MF = *MBB.getParent();
    427   MachineFrameInfo *MFI = MF.getFrameInfo();
    428   auto &HST = MF.getSubtarget<HexagonSubtarget>();
    429   auto &HII = *HST.getInstrInfo();
    430   auto &HRI = *HST.getRegisterInfo();
    431   DebugLoc dl;
    432 
    433   unsigned MaxAlign = std::max(MFI->getMaxAlignment(), getStackAlignment());
    434 
    435   // Calculate the total stack frame size.
    436   // Get the number of bytes to allocate from the FrameInfo.
    437   unsigned FrameSize = MFI->getStackSize();
    438   // Round up the max call frame size to the max alignment on the stack.
    439   unsigned MaxCFA = RoundUpToAlignment(MFI->getMaxCallFrameSize(), MaxAlign);
    440   MFI->setMaxCallFrameSize(MaxCFA);
    441 
    442   FrameSize = MaxCFA + RoundUpToAlignment(FrameSize, MaxAlign);
    443   MFI->setStackSize(FrameSize);
    444 
    445   bool AlignStack = (MaxAlign > getStackAlignment());
    446 
    447   // Get the number of bytes to allocate from the FrameInfo.
    448   unsigned NumBytes = MFI->getStackSize();
    449   unsigned SP = HRI.getStackRegister();
    450   unsigned MaxCF = MFI->getMaxCallFrameSize();
    451   MachineBasicBlock::iterator InsertPt = MBB.begin();
    452 
    453   auto *FuncInfo = MF.getInfo<HexagonMachineFunctionInfo>();
    454   auto &AdjustRegs = FuncInfo->getAllocaAdjustInsts();
    455 
    456   for (auto MI : AdjustRegs) {
    457     assert((MI->getOpcode() == Hexagon::ALLOCA) && "Expected alloca");
    458     expandAlloca(MI, HII, SP, MaxCF);
    459     MI->eraseFromParent();
    460   }
    461 
    462   if (!hasFP(MF))
    463     return;
    464 
    465   // Check for overflow.
    466   // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
    467   const unsigned int ALLOCFRAME_MAX = 16384;
    468 
    469   // Create a dummy memory operand to avoid allocframe from being treated as
    470   // a volatile memory reference.
    471   MachineMemOperand *MMO =
    472     MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
    473                             4, 4);
    474 
    475   if (NumBytes >= ALLOCFRAME_MAX) {
    476     // Emit allocframe(#0).
    477     BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
    478       .addImm(0)
    479       .addMemOperand(MMO);
    480 
    481     // Subtract offset from frame pointer.
    482     // We use a caller-saved non-parameter register for that.
    483     unsigned CallerSavedReg = HRI.getFirstCallerSavedNonParamReg();
    484     BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CONST32_Int_Real),
    485             CallerSavedReg).addImm(NumBytes);
    486     BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_sub), SP)
    487       .addReg(SP)
    488       .addReg(CallerSavedReg);
    489   } else {
    490     BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
    491       .addImm(NumBytes)
    492       .addMemOperand(MMO);
    493   }
    494 
    495   if (AlignStack) {
    496     BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)
    497         .addReg(SP)
    498         .addImm(-int64_t(MaxAlign));
    499   }
    500 }
    501 
    502 void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
    503   MachineFunction &MF = *MBB.getParent();
    504   if (!hasFP(MF))
    505     return;
    506 
    507   auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
    508   auto &HII = *HST.getInstrInfo();
    509   auto &HRI = *HST.getRegisterInfo();
    510   unsigned SP = HRI.getStackRegister();
    511 
    512   MachineInstr *RetI = nullptr;
    513   for (auto &I : MBB) {
    514     if (!I.isReturn())
    515       continue;
    516     RetI = &I;
    517     break;
    518   }
    519   unsigned RetOpc = RetI ? RetI->getOpcode() : 0;
    520 
    521   MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();
    522   DebugLoc DL;
    523   if (InsertPt != MBB.end())
    524     DL = InsertPt->getDebugLoc();
    525   else if (!MBB.empty())
    526     DL = std::prev(MBB.end())->getDebugLoc();
    527 
    528   // Handle EH_RETURN.
    529   if (RetOpc == Hexagon::EH_RETURN_JMPR) {
    530     BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe));
    531     BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::A2_add), SP)
    532         .addReg(SP)
    533         .addReg(Hexagon::R28);
    534     return;
    535   }
    536 
    537   // Check for RESTORE_DEALLOC_RET* tail call. Don't emit an extra dealloc-
    538   // frame instruction if we encounter it.
    539   if (RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4) {
    540     MachineBasicBlock::iterator It = RetI;
    541     ++It;
    542     // Delete all instructions after the RESTORE (except labels).
    543     while (It != MBB.end()) {
    544       if (!It->isLabel())
    545         It = MBB.erase(It);
    546       else
    547         ++It;
    548     }
    549     return;
    550   }
    551 
    552   // It is possible that the restoring code is a call to a library function.
    553   // All of the restore* functions include "deallocframe", so we need to make
    554   // sure that we don't add an extra one.
    555   bool NeedsDeallocframe = true;
    556   if (!MBB.empty() && InsertPt != MBB.begin()) {
    557     MachineBasicBlock::iterator PrevIt = std::prev(InsertPt);
    558     unsigned COpc = PrevIt->getOpcode();
    559     if (COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4)
    560       NeedsDeallocframe = false;
    561   }
    562 
    563   if (!NeedsDeallocframe)
    564     return;
    565   // If the returning instruction is JMPret, replace it with dealloc_return,
    566   // otherwise just add deallocframe. The function could be returning via a
    567   // tail call.
    568   if (RetOpc != Hexagon::JMPret || DisableDeallocRet) {
    569     BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe));
    570     return;
    571   }
    572   unsigned NewOpc = Hexagon::L4_return;
    573   MachineInstr *NewI = BuildMI(MBB, RetI, DL, HII.get(NewOpc));
    574   // Transfer the function live-out registers.
    575   NewI->copyImplicitOps(MF, RetI);
    576   MBB.erase(RetI);
    577 }
    578 
    579 
    580 namespace {
    581   bool IsAllocFrame(MachineBasicBlock::const_iterator It) {
    582     if (!It->isBundle())
    583       return It->getOpcode() == Hexagon::S2_allocframe;
    584     auto End = It->getParent()->instr_end();
    585     MachineBasicBlock::const_instr_iterator I = It.getInstrIterator();
    586     while (++I != End && I->isBundled())
    587       if (I->getOpcode() == Hexagon::S2_allocframe)
    588         return true;
    589     return false;
    590   }
    591 
    592   MachineBasicBlock::iterator FindAllocFrame(MachineBasicBlock &B) {
    593     for (auto &I : B)
    594       if (IsAllocFrame(I))
    595         return I;
    596     return B.end();
    597   }
    598 }
    599 
    600 
    601 void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const {
    602   for (auto &B : MF) {
    603     auto AF = FindAllocFrame(B);
    604     if (AF == B.end())
    605       continue;
    606     insertCFIInstructionsAt(B, ++AF);
    607   }
    608 }
    609 
    610 
    611 void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
    612       MachineBasicBlock::iterator At) const {
    613   MachineFunction &MF = *MBB.getParent();
    614   MachineFrameInfo *MFI = MF.getFrameInfo();
    615   MachineModuleInfo &MMI = MF.getMMI();
    616   auto &HST = MF.getSubtarget<HexagonSubtarget>();
    617   auto &HII = *HST.getInstrInfo();
    618   auto &HRI = *HST.getRegisterInfo();
    619 
    620   // If CFI instructions have debug information attached, something goes
    621   // wrong with the final assembly generation: the prolog_end is placed
    622   // in a wrong location.
    623   DebugLoc DL;
    624   const MCInstrDesc &CFID = HII.get(TargetOpcode::CFI_INSTRUCTION);
    625 
    626   MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
    627 
    628   if (hasFP(MF)) {
    629     unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true);
    630     unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true);
    631 
    632     // Define CFA via an offset from the value of FP.
    633     //
    634     //  -8   -4    0 (SP)
    635     // --+----+----+---------------------
    636     //   | FP | LR |          increasing addresses -->
    637     // --+----+----+---------------------
    638     //   |         +-- Old SP (before allocframe)
    639     //   +-- New FP (after allocframe)
    640     //
    641     // MCCFIInstruction::createDefCfa subtracts the offset from the register.
    642     // MCCFIInstruction::createOffset takes the offset without sign change.
    643     auto DefCfa = MCCFIInstruction::createDefCfa(FrameLabel, DwFPReg, -8);
    644     BuildMI(MBB, At, DL, CFID)
    645         .addCFIIndex(MMI.addFrameInst(DefCfa));
    646     // R31 (return addr) = CFA - 4
    647     auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4);
    648     BuildMI(MBB, At, DL, CFID)
    649         .addCFIIndex(MMI.addFrameInst(OffR31));
    650     // R30 (frame ptr) = CFA - 8
    651     auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8);
    652     BuildMI(MBB, At, DL, CFID)
    653         .addCFIIndex(MMI.addFrameInst(OffR30));
    654   }
    655 
    656   static unsigned int RegsToMove[] = {
    657     Hexagon::R1,  Hexagon::R0,  Hexagon::R3,  Hexagon::R2,
    658     Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18,
    659     Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22,
    660     Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26,
    661     Hexagon::D0,  Hexagon::D1,  Hexagon::D8,  Hexagon::D9,
    662     Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13,
    663     Hexagon::NoRegister
    664   };
    665 
    666   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
    667 
    668   for (unsigned i = 0; RegsToMove[i] != Hexagon::NoRegister; ++i) {
    669     unsigned Reg = RegsToMove[i];
    670     auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool {
    671       return C.getReg() == Reg;
    672     };
    673     auto F = std::find_if(CSI.begin(), CSI.end(), IfR);
    674     if (F == CSI.end())
    675       continue;
    676 
    677     // Subtract 8 to make room for R30 and R31, which are added above.
    678     unsigned FrameReg;
    679     int64_t Offset = getFrameIndexReference(MF, F->getFrameIdx(), FrameReg) - 8;
    680 
    681     if (Reg < Hexagon::D0 || Reg > Hexagon::D15) {
    682       unsigned DwarfReg = HRI.getDwarfRegNum(Reg, true);
    683       auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg,
    684                                                    Offset);
    685       BuildMI(MBB, At, DL, CFID)
    686           .addCFIIndex(MMI.addFrameInst(OffReg));
    687     } else {
    688       // Split the double regs into subregs, and generate appropriate
    689       // cfi_offsets.
    690       // The only reason, we are split double regs is, llvm-mc does not
    691       // understand paired registers for cfi_offset.
    692       // Eg .cfi_offset r1:0, -64
    693 
    694       unsigned HiReg = HRI.getSubReg(Reg, Hexagon::subreg_hireg);
    695       unsigned LoReg = HRI.getSubReg(Reg, Hexagon::subreg_loreg);
    696       unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true);
    697       unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true);
    698       auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg,
    699                                                   Offset+4);
    700       BuildMI(MBB, At, DL, CFID)
    701           .addCFIIndex(MMI.addFrameInst(OffHi));
    702       auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg,
    703                                                   Offset);
    704       BuildMI(MBB, At, DL, CFID)
    705           .addCFIIndex(MMI.addFrameInst(OffLo));
    706     }
    707   }
    708 }
    709 
    710 
    711 bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
    712   auto &MFI = *MF.getFrameInfo();
    713   auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
    714 
    715   bool HasFixed = MFI.getNumFixedObjects();
    716   bool HasPrealloc = const_cast<MachineFrameInfo&>(MFI)
    717                         .getLocalFrameObjectCount();
    718   bool HasExtraAlign = HRI.needsStackRealignment(MF);
    719   bool HasAlloca = MFI.hasVarSizedObjects();
    720 
    721   // Insert ALLOCFRAME if we need to or at -O0 for the debugger.  Think
    722   // that this shouldn't be required, but doing so now because gcc does and
    723   // gdb can't break at the start of the function without it.  Will remove if
    724   // this turns out to be a gdb bug.
    725   //
    726   if (MF.getTarget().getOptLevel() == CodeGenOpt::None)
    727     return true;
    728 
    729   // By default we want to use SP (since it's always there). FP requires
    730   // some setup (i.e. ALLOCFRAME).
    731   // Fixed and preallocated objects need FP if the distance from them to
    732   // the SP is unknown (as is with alloca or aligna).
    733   if ((HasFixed || HasPrealloc) && (HasAlloca || HasExtraAlign))
    734     return true;
    735 
    736   if (MFI.getStackSize() > 0) {
    737     if (UseAllocframe)
    738       return true;
    739   }
    740 
    741   if (MFI.hasCalls() ||
    742       MF.getInfo<HexagonMachineFunctionInfo>()->hasClobberLR())
    743     return true;
    744 
    745   return false;
    746 }
    747 
    748 
    749 enum SpillKind {
    750   SK_ToMem,
    751   SK_FromMem,
    752   SK_FromMemTailcall
    753 };
    754 
    755 static const char *
    756 getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType) {
    757   const char * V4SpillToMemoryFunctions[] = {
    758     "__save_r16_through_r17",
    759     "__save_r16_through_r19",
    760     "__save_r16_through_r21",
    761     "__save_r16_through_r23",
    762     "__save_r16_through_r25",
    763     "__save_r16_through_r27" };
    764 
    765   const char * V4SpillFromMemoryFunctions[] = {
    766     "__restore_r16_through_r17_and_deallocframe",
    767     "__restore_r16_through_r19_and_deallocframe",
    768     "__restore_r16_through_r21_and_deallocframe",
    769     "__restore_r16_through_r23_and_deallocframe",
    770     "__restore_r16_through_r25_and_deallocframe",
    771     "__restore_r16_through_r27_and_deallocframe" };
    772 
    773   const char * V4SpillFromMemoryTailcallFunctions[] = {
    774     "__restore_r16_through_r17_and_deallocframe_before_tailcall",
    775     "__restore_r16_through_r19_and_deallocframe_before_tailcall",
    776     "__restore_r16_through_r21_and_deallocframe_before_tailcall",
    777     "__restore_r16_through_r23_and_deallocframe_before_tailcall",
    778     "__restore_r16_through_r25_and_deallocframe_before_tailcall",
    779     "__restore_r16_through_r27_and_deallocframe_before_tailcall"
    780   };
    781 
    782   const char **SpillFunc = nullptr;
    783 
    784   switch(SpillType) {
    785   case SK_ToMem:
    786     SpillFunc = V4SpillToMemoryFunctions;
    787     break;
    788   case SK_FromMem:
    789     SpillFunc = V4SpillFromMemoryFunctions;
    790     break;
    791   case SK_FromMemTailcall:
    792     SpillFunc = V4SpillFromMemoryTailcallFunctions;
    793     break;
    794   }
    795   assert(SpillFunc && "Unknown spill kind");
    796 
    797   // Spill all callee-saved registers up to the highest register used.
    798   switch (MaxReg) {
    799   case Hexagon::R17:
    800     return SpillFunc[0];
    801   case Hexagon::R19:
    802     return SpillFunc[1];
    803   case Hexagon::R21:
    804     return SpillFunc[2];
    805   case Hexagon::R23:
    806     return SpillFunc[3];
    807   case Hexagon::R25:
    808     return SpillFunc[4];
    809   case Hexagon::R27:
    810     return SpillFunc[5];
    811   default:
    812     llvm_unreachable("Unhandled maximum callee save register");
    813   }
    814   return 0;
    815 }
    816 
    817 /// Adds all callee-saved registers up to MaxReg to the instruction.
    818 static void addCalleeSaveRegistersAsImpOperand(MachineInstr *Inst,
    819                                            unsigned MaxReg, bool IsDef) {
    820   // Add the callee-saved registers as implicit uses.
    821   for (unsigned R = Hexagon::R16; R <= MaxReg; ++R) {
    822     MachineOperand ImpUse = MachineOperand::CreateReg(R, IsDef, true);
    823     Inst->addOperand(ImpUse);
    824   }
    825 }
    826 
    827 
    828 int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
    829       int FI, unsigned &FrameReg) const {
    830   auto &MFI = *MF.getFrameInfo();
    831   auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
    832 
    833   // Large parts of this code are shared with HRI::eliminateFrameIndex.
    834   int Offset = MFI.getObjectOffset(FI);
    835   bool HasAlloca = MFI.hasVarSizedObjects();
    836   bool HasExtraAlign = HRI.needsStackRealignment(MF);
    837   bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None;
    838 
    839   unsigned SP = HRI.getStackRegister(), FP = HRI.getFrameRegister();
    840   unsigned AP = 0;
    841   if (const MachineInstr *AI = getAlignaInstr(MF))
    842     AP = AI->getOperand(0).getReg();
    843   unsigned FrameSize = MFI.getStackSize();
    844 
    845   bool UseFP = false, UseAP = false;  // Default: use SP (except at -O0).
    846   // Use FP at -O0, except when there are objects with extra alignment.
    847   // That additional alignment requirement may cause a pad to be inserted,
    848   // which will make it impossible to use FP to access objects located
    849   // past the pad.
    850   if (NoOpt && !HasExtraAlign)
    851     UseFP = true;
    852   if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) {
    853     // Fixed and preallocated objects will be located before any padding
    854     // so FP must be used to access them.
    855     UseFP |= (HasAlloca || HasExtraAlign);
    856   } else {
    857     if (HasAlloca) {
    858       if (HasExtraAlign)
    859         UseAP = true;
    860       else
    861         UseFP = true;
    862     }
    863   }
    864 
    865   // If FP was picked, then there had better be FP.
    866   bool HasFP = hasFP(MF);
    867   assert((HasFP || !UseFP) && "This function must have frame pointer");
    868 
    869   // Having FP implies allocframe. Allocframe will store extra 8 bytes:
    870   // FP/LR. If the base register is used to access an object across these
    871   // 8 bytes, then the offset will need to be adjusted by 8.
    872   //
    873   // After allocframe:
    874   //                    HexagonISelLowering adds 8 to ---+
    875   //                    the offsets of all stack-based   |
    876   //                    arguments (*)                    |
    877   //                                                     |
    878   //   getObjectOffset < 0   0     8  getObjectOffset >= 8
    879   // ------------------------+-----+------------------------> increasing
    880   //     <local objects>     |FP/LR|    <input arguments>     addresses
    881   // -----------------+------+-----+------------------------>
    882   //                  |      |
    883   //    SP/AP point --+      +-- FP points here (**)
    884   //    somewhere on
    885   //    this side of FP/LR
    886   //
    887   // (*) See LowerFormalArguments. The FP/LR is assumed to be present.
    888   // (**) *FP == old-FP. FP+0..7 are the bytes of FP/LR.
    889 
    890   // The lowering assumes that FP/LR is present, and so the offsets of
    891   // the formal arguments start at 8. If FP/LR is not there we need to
    892   // reduce the offset by 8.
    893   if (Offset > 0 && !HasFP)
    894     Offset -= 8;
    895 
    896   if (UseFP)
    897     FrameReg = FP;
    898   else if (UseAP)
    899     FrameReg = AP;
    900   else
    901     FrameReg = SP;
    902 
    903   // Calculate the actual offset in the instruction. If there is no FP
    904   // (in other words, no allocframe), then SP will not be adjusted (i.e.
    905   // there will be no SP -= FrameSize), so the frame size should not be
    906   // added to the calculated offset.
    907   int RealOffset = Offset;
    908   if (!UseFP && !UseAP && HasFP)
    909     RealOffset = FrameSize+Offset;
    910   return RealOffset;
    911 }
    912 
    913 
    914 bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
    915       const CSIVect &CSI, const HexagonRegisterInfo &HRI) const {
    916   if (CSI.empty())
    917     return true;
    918 
    919   MachineBasicBlock::iterator MI = MBB.begin();
    920   MachineFunction &MF = *MBB.getParent();
    921   auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
    922 
    923   if (useSpillFunction(MF, CSI)) {
    924     unsigned MaxReg = getMaxCalleeSavedReg(CSI, HRI);
    925     const char *SpillFun = getSpillFunctionFor(MaxReg, SK_ToMem);
    926     // Call spill function.
    927     DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
    928     MachineInstr *SaveRegsCall =
    929         BuildMI(MBB, MI, DL, HII.get(Hexagon::SAVE_REGISTERS_CALL_V4))
    930           .addExternalSymbol(SpillFun);
    931     // Add callee-saved registers as use.
    932     addCalleeSaveRegistersAsImpOperand(SaveRegsCall, MaxReg, false);
    933     // Add live in registers.
    934     for (unsigned I = 0; I < CSI.size(); ++I)
    935       MBB.addLiveIn(CSI[I].getReg());
    936     return true;
    937   }
    938 
    939   for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
    940     unsigned Reg = CSI[i].getReg();
    941     // Add live in registers. We treat eh_return callee saved register r0 - r3
    942     // specially. They are not really callee saved registers as they are not
    943     // supposed to be killed.
    944     bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg);
    945     int FI = CSI[i].getFrameIdx();
    946     const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
    947     HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI);
    948     if (IsKill)
    949       MBB.addLiveIn(Reg);
    950   }
    951   return true;
    952 }
    953 
    954 
    955 bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
    956       const CSIVect &CSI, const HexagonRegisterInfo &HRI) const {
    957   if (CSI.empty())
    958     return false;
    959 
    960   MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
    961   MachineFunction &MF = *MBB.getParent();
    962   auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
    963 
    964   if (useRestoreFunction(MF, CSI)) {
    965     bool HasTC = hasTailCall(MBB) || !hasReturn(MBB);
    966     unsigned MaxR = getMaxCalleeSavedReg(CSI, HRI);
    967     SpillKind Kind = HasTC ? SK_FromMemTailcall : SK_FromMem;
    968     const char *RestoreFn = getSpillFunctionFor(MaxR, Kind);
    969 
    970     // Call spill function.
    971     DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc()
    972                                   : MBB.getLastNonDebugInstr()->getDebugLoc();
    973     MachineInstr *DeallocCall = nullptr;
    974 
    975     if (HasTC) {
    976       unsigned ROpc = Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4;
    977       DeallocCall = BuildMI(MBB, MI, DL, HII.get(ROpc))
    978           .addExternalSymbol(RestoreFn);
    979     } else {
    980       // The block has a return.
    981       MachineBasicBlock::iterator It = MBB.getFirstTerminator();
    982       assert(It->isReturn() && std::next(It) == MBB.end());
    983       unsigned ROpc = Hexagon::RESTORE_DEALLOC_RET_JMP_V4;
    984       DeallocCall = BuildMI(MBB, It, DL, HII.get(ROpc))
    985           .addExternalSymbol(RestoreFn);
    986       // Transfer the function live-out registers.
    987       DeallocCall->copyImplicitOps(MF, It);
    988     }
    989     addCalleeSaveRegistersAsImpOperand(DeallocCall, MaxR, true);
    990     return true;
    991   }
    992 
    993   for (unsigned i = 0; i < CSI.size(); ++i) {
    994     unsigned Reg = CSI[i].getReg();
    995     const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
    996     int FI = CSI[i].getFrameIdx();
    997     HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI);
    998   }
    999   return true;
   1000 }
   1001 
   1002 
   1003 void HexagonFrameLowering::eliminateCallFramePseudoInstr(MachineFunction &MF,
   1004       MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
   1005   MachineInstr &MI = *I;
   1006   unsigned Opc = MI.getOpcode();
   1007   (void)Opc; // Silence compiler warning.
   1008   assert((Opc == Hexagon::ADJCALLSTACKDOWN || Opc == Hexagon::ADJCALLSTACKUP) &&
   1009          "Cannot handle this call frame pseudo instruction");
   1010   MBB.erase(I);
   1011 }
   1012 
   1013 
   1014 void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
   1015     MachineFunction &MF, RegScavenger *RS) const {
   1016   // If this function has uses aligned stack and also has variable sized stack
   1017   // objects, then we need to map all spill slots to fixed positions, so that
   1018   // they can be accessed through FP. Otherwise they would have to be accessed
   1019   // via AP, which may not be available at the particular place in the program.
   1020   MachineFrameInfo *MFI = MF.getFrameInfo();
   1021   bool HasAlloca = MFI->hasVarSizedObjects();
   1022   bool NeedsAlign = (MFI->getMaxAlignment() > getStackAlignment());
   1023 
   1024   if (!HasAlloca || !NeedsAlign)
   1025     return;
   1026 
   1027   unsigned LFS = MFI->getLocalFrameSize();
   1028   int Offset = -LFS;
   1029   for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
   1030     if (!MFI->isSpillSlotObjectIndex(i) || MFI->isDeadObjectIndex(i))
   1031       continue;
   1032     int S = MFI->getObjectSize(i);
   1033     LFS += S;
   1034     Offset -= S;
   1035     MFI->mapLocalFrameObject(i, Offset);
   1036   }
   1037 
   1038   MFI->setLocalFrameSize(LFS);
   1039   unsigned A = MFI->getLocalFrameMaxAlign();
   1040   assert(A <= 8 && "Unexpected local frame alignment");
   1041   if (A == 0)
   1042     MFI->setLocalFrameMaxAlign(8);
   1043   MFI->setUseLocalStackAllocationBlock(true);
   1044 }
   1045 
   1046 /// Returns true if there is no caller saved registers available.
   1047 static bool needToReserveScavengingSpillSlots(MachineFunction &MF,
   1048                                               const HexagonRegisterInfo &HRI) {
   1049   MachineRegisterInfo &MRI = MF.getRegInfo();
   1050   const MCPhysReg *CallerSavedRegs = HRI.getCallerSavedRegs(&MF);
   1051   // Check for an unused caller-saved register.
   1052   for ( ; *CallerSavedRegs; ++CallerSavedRegs) {
   1053     MCPhysReg FreeReg = *CallerSavedRegs;
   1054     if (!MRI.reg_nodbg_empty(FreeReg))
   1055       continue;
   1056 
   1057     // Check aliased register usage.
   1058     bool IsCurrentRegUsed = false;
   1059     for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI)
   1060       if (!MRI.reg_nodbg_empty(*AI)) {
   1061         IsCurrentRegUsed = true;
   1062         break;
   1063       }
   1064     if (IsCurrentRegUsed)
   1065       continue;
   1066 
   1067     // Neither directly used nor used through an aliased register.
   1068     return false;
   1069   }
   1070   // All caller-saved registers are used.
   1071   return true;
   1072 }
   1073 
   1074 
   1075 /// Replaces the predicate spill code pseudo instructions by valid instructions.
   1076 bool HexagonFrameLowering::replacePredRegPseudoSpillCode(MachineFunction &MF)
   1077       const {
   1078   auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
   1079   auto &HII = *HST.getInstrInfo();
   1080   MachineRegisterInfo &MRI = MF.getRegInfo();
   1081   bool HasReplacedPseudoInst = false;
   1082   // Replace predicate spill pseudo instructions by real code.
   1083   // Loop over all of the basic blocks.
   1084   for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
   1085        MBBb != MBBe; ++MBBb) {
   1086     MachineBasicBlock *MBB = &*MBBb;
   1087     // Traverse the basic block.
   1088     MachineBasicBlock::iterator NextII;
   1089     for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
   1090          MII = NextII) {
   1091       MachineInstr *MI = MII;
   1092       NextII = std::next(MII);
   1093       int Opc = MI->getOpcode();
   1094       if (Opc == Hexagon::STriw_pred) {
   1095         HasReplacedPseudoInst = true;
   1096         // STriw_pred FI, 0, SrcReg;
   1097         unsigned VirtReg = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
   1098         unsigned SrcReg = MI->getOperand(2).getReg();
   1099         bool IsOrigSrcRegKilled = MI->getOperand(2).isKill();
   1100 
   1101         assert(MI->getOperand(0).isFI() && "Expect a frame index");
   1102         assert(Hexagon::PredRegsRegClass.contains(SrcReg) &&
   1103                "Not a predicate register");
   1104 
   1105         // Insert transfer to general purpose register.
   1106         //   VirtReg = C2_tfrpr SrcPredReg
   1107         BuildMI(*MBB, MII, MI->getDebugLoc(), HII.get(Hexagon::C2_tfrpr),
   1108                 VirtReg).addReg(SrcReg, getKillRegState(IsOrigSrcRegKilled));
   1109 
   1110         // Change instruction to S2_storeri_io.
   1111         //   S2_storeri_io FI, 0, VirtReg
   1112         MI->setDesc(HII.get(Hexagon::S2_storeri_io));
   1113         MI->getOperand(2).setReg(VirtReg);
   1114         MI->getOperand(2).setIsKill();
   1115 
   1116       } else if (Opc == Hexagon::LDriw_pred) {
   1117         // DstReg = LDriw_pred FI, 0
   1118         MachineOperand &M0 = MI->getOperand(0);
   1119         if (M0.isDead()) {
   1120           MBB->erase(MII);
   1121           continue;
   1122         }
   1123 
   1124         unsigned VirtReg = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
   1125         unsigned DestReg = MI->getOperand(0).getReg();
   1126 
   1127         assert(MI->getOperand(1).isFI() && "Expect a frame index");
   1128         assert(Hexagon::PredRegsRegClass.contains(DestReg) &&
   1129                "Not a predicate register");
   1130 
   1131         // Change instruction to L2_loadri_io.
   1132         //   VirtReg = L2_loadri_io FI, 0
   1133         MI->setDesc(HII.get(Hexagon::L2_loadri_io));
   1134         MI->getOperand(0).setReg(VirtReg);
   1135 
   1136         // Insert transfer to general purpose register.
   1137         //   DestReg = C2_tfrrp VirtReg
   1138         const MCInstrDesc &D = HII.get(Hexagon::C2_tfrrp);
   1139         BuildMI(*MBB, std::next(MII), MI->getDebugLoc(), D, DestReg)
   1140           .addReg(VirtReg, getKillRegState(true));
   1141         HasReplacedPseudoInst = true;
   1142       }
   1143     }
   1144   }
   1145   return HasReplacedPseudoInst;
   1146 }
   1147 
   1148 
   1149 void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
   1150                                                 BitVector &SavedRegs,
   1151                                                 RegScavenger *RS) const {
   1152   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
   1153 
   1154   auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
   1155   auto &HRI = *HST.getRegisterInfo();
   1156 
   1157   bool HasEHReturn = MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn();
   1158 
   1159   // If we have a function containing __builtin_eh_return we want to spill and
   1160   // restore all callee saved registers. Pretend that they are used.
   1161   if (HasEHReturn) {
   1162     for (const MCPhysReg *CSRegs = HRI.getCalleeSavedRegs(&MF); *CSRegs;
   1163          ++CSRegs)
   1164       SavedRegs.set(*CSRegs);
   1165   }
   1166 
   1167   const TargetRegisterClass &RC = Hexagon::IntRegsRegClass;
   1168 
   1169   // Replace predicate register pseudo spill code.
   1170   bool HasReplacedPseudoInst = replacePredRegPseudoSpillCode(MF);
   1171 
   1172   // We need to reserve a a spill slot if scavenging could potentially require
   1173   // spilling a scavenged register.
   1174   if (HasReplacedPseudoInst && needToReserveScavengingSpillSlots(MF, HRI)) {
   1175     MachineFrameInfo *MFI = MF.getFrameInfo();
   1176     for (int i=0; i < NumberScavengerSlots; i++)
   1177       RS->addScavengingFrameIndex(
   1178         MFI->CreateSpillStackObject(RC.getSize(), RC.getAlignment()));
   1179   }
   1180 }
   1181 
   1182 
   1183 #ifndef NDEBUG
   1184 static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) {
   1185   dbgs() << '{';
   1186   for (int x = Regs.find_first(); x >= 0; x = Regs.find_next(x)) {
   1187     unsigned R = x;
   1188     dbgs() << ' ' << PrintReg(R, &TRI);
   1189   }
   1190   dbgs() << " }";
   1191 }
   1192 #endif
   1193 
   1194 
   1195 bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
   1196       const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const {
   1197   DEBUG(dbgs() << LLVM_FUNCTION_NAME << " on "
   1198                << MF.getFunction()->getName() << '\n');
   1199   MachineFrameInfo *MFI = MF.getFrameInfo();
   1200   BitVector SRegs(Hexagon::NUM_TARGET_REGS);
   1201 
   1202   // Generate a set of unique, callee-saved registers (SRegs), where each
   1203   // register in the set is maximal in terms of sub-/super-register relation,
   1204   // i.e. for each R in SRegs, no proper super-register of R is also in SRegs.
   1205 
   1206   // (1) For each callee-saved register, add that register and all of its
   1207   // sub-registers to SRegs.
   1208   DEBUG(dbgs() << "Initial CS registers: {");
   1209   for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
   1210     unsigned R = CSI[i].getReg();
   1211     DEBUG(dbgs() << ' ' << PrintReg(R, TRI));
   1212     for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
   1213       SRegs[*SR] = true;
   1214   }
   1215   DEBUG(dbgs() << " }\n");
   1216   DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
   1217 
   1218   // (2) For each reserved register, remove that register and all of its
   1219   // sub- and super-registers from SRegs.
   1220   BitVector Reserved = TRI->getReservedRegs(MF);
   1221   for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x)) {
   1222     unsigned R = x;
   1223     for (MCSuperRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
   1224       SRegs[*SR] = false;
   1225   }
   1226   DEBUG(dbgs() << "Res:     "; dump_registers(Reserved, *TRI); dbgs() << "\n");
   1227   DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
   1228 
   1229   // (3) Collect all registers that have at least one sub-register in SRegs,
   1230   // and also have no sub-registers that are reserved. These will be the can-
   1231   // didates for saving as a whole instead of their individual sub-registers.
   1232   // (Saving R17:16 instead of R16 is fine, but only if R17 was not reserved.)
   1233   BitVector TmpSup(Hexagon::NUM_TARGET_REGS);
   1234   for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
   1235     unsigned R = x;
   1236     for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR)
   1237       TmpSup[*SR] = true;
   1238   }
   1239   for (int x = TmpSup.find_first(); x >= 0; x = TmpSup.find_next(x)) {
   1240     unsigned R = x;
   1241     for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR) {
   1242       if (!Reserved[*SR])
   1243         continue;
   1244       TmpSup[R] = false;
   1245       break;
   1246     }
   1247   }
   1248   DEBUG(dbgs() << "TmpSup:  "; dump_registers(TmpSup, *TRI); dbgs() << "\n");
   1249 
   1250   // (4) Include all super-registers found in (3) into SRegs.
   1251   SRegs |= TmpSup;
   1252   DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
   1253 
   1254   // (5) For each register R in SRegs, if any super-register of R is in SRegs,
   1255   // remove R from SRegs.
   1256   for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
   1257     unsigned R = x;
   1258     for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR) {
   1259       if (!SRegs[*SR])
   1260         continue;
   1261       SRegs[R] = false;
   1262       break;
   1263     }
   1264   }
   1265   DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
   1266 
   1267   // Now, for each register that has a fixed stack slot, create the stack
   1268   // object for it.
   1269   CSI.clear();
   1270 
   1271   typedef TargetFrameLowering::SpillSlot SpillSlot;
   1272   unsigned NumFixed;
   1273   int MinOffset = 0;  // CS offsets are negative.
   1274   const SpillSlot *FixedSlots = getCalleeSavedSpillSlots(NumFixed);
   1275   for (const SpillSlot *S = FixedSlots; S != FixedSlots+NumFixed; ++S) {
   1276     if (!SRegs[S->Reg])
   1277       continue;
   1278     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg);
   1279     int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), S->Offset);
   1280     MinOffset = std::min(MinOffset, S->Offset);
   1281     CSI.push_back(CalleeSavedInfo(S->Reg, FI));
   1282     SRegs[S->Reg] = false;
   1283   }
   1284 
   1285   // There can be some registers that don't have fixed slots. For example,
   1286   // we need to store R0-R3 in functions with exception handling. For each
   1287   // such register, create a non-fixed stack object.
   1288   for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
   1289     unsigned R = x;
   1290     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R);
   1291     int Off = MinOffset - RC->getSize();
   1292     unsigned Align = std::min(RC->getAlignment(), getStackAlignment());
   1293     assert(isPowerOf2_32(Align));
   1294     Off &= -Align;
   1295     int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), Off);
   1296     MinOffset = std::min(MinOffset, Off);
   1297     CSI.push_back(CalleeSavedInfo(R, FI));
   1298     SRegs[R] = false;
   1299   }
   1300 
   1301   DEBUG({
   1302     dbgs() << "CS information: {";
   1303     for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
   1304       int FI = CSI[i].getFrameIdx();
   1305       int Off = MFI->getObjectOffset(FI);
   1306       dbgs() << ' ' << PrintReg(CSI[i].getReg(), TRI) << ":fi#" << FI << ":sp";
   1307       if (Off >= 0)
   1308         dbgs() << '+';
   1309       dbgs() << Off;
   1310     }
   1311     dbgs() << " }\n";
   1312   });
   1313 
   1314 #ifndef NDEBUG
   1315   // Verify that all registers were handled.
   1316   bool MissedReg = false;
   1317   for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
   1318     unsigned R = x;
   1319     dbgs() << PrintReg(R, TRI) << ' ';
   1320     MissedReg = true;
   1321   }
   1322   if (MissedReg)
   1323     llvm_unreachable("...there are unhandled callee-saved registers!");
   1324 #endif
   1325 
   1326   return true;
   1327 }
   1328 
   1329 
   1330 void HexagonFrameLowering::expandAlloca(MachineInstr *AI,
   1331       const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const {
   1332   MachineBasicBlock &MB = *AI->getParent();
   1333   DebugLoc DL = AI->getDebugLoc();
   1334   unsigned A = AI->getOperand(2).getImm();
   1335 
   1336   // Have
   1337   //    Rd  = alloca Rs, #A
   1338   //
   1339   // If Rs and Rd are different registers, use this sequence:
   1340   //    Rd  = sub(r29, Rs)
   1341   //    r29 = sub(r29, Rs)
   1342   //    Rd  = and(Rd, #-A)    ; if necessary
   1343   //    r29 = and(r29, #-A)   ; if necessary
   1344   //    Rd  = add(Rd, #CF)    ; CF size aligned to at most A
   1345   // otherwise, do
   1346   //    Rd  = sub(r29, Rs)
   1347   //    Rd  = and(Rd, #-A)    ; if necessary
   1348   //    r29 = Rd
   1349   //    Rd  = add(Rd, #CF)    ; CF size aligned to at most A
   1350 
   1351   MachineOperand &RdOp = AI->getOperand(0);
   1352   MachineOperand &RsOp = AI->getOperand(1);
   1353   unsigned Rd = RdOp.getReg(), Rs = RsOp.getReg();
   1354 
   1355   // Rd = sub(r29, Rs)
   1356   BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), Rd)
   1357       .addReg(SP)
   1358       .addReg(Rs);
   1359   if (Rs != Rd) {
   1360     // r29 = sub(r29, Rs)
   1361     BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), SP)
   1362         .addReg(SP)
   1363         .addReg(Rs);
   1364   }
   1365   if (A > 8) {
   1366     // Rd  = and(Rd, #-A)
   1367     BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), Rd)
   1368         .addReg(Rd)
   1369         .addImm(-int64_t(A));
   1370     if (Rs != Rd)
   1371       BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), SP)
   1372           .addReg(SP)
   1373           .addImm(-int64_t(A));
   1374   }
   1375   if (Rs == Rd) {
   1376     // r29 = Rd
   1377     BuildMI(MB, AI, DL, HII.get(TargetOpcode::COPY), SP)
   1378         .addReg(Rd);
   1379   }
   1380   if (CF > 0) {
   1381     // Rd = add(Rd, #CF)
   1382     BuildMI(MB, AI, DL, HII.get(Hexagon::A2_addi), Rd)
   1383         .addReg(Rd)
   1384         .addImm(CF);
   1385   }
   1386 }
   1387 
   1388 
   1389 bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const {
   1390   const MachineFrameInfo *MFI = MF.getFrameInfo();
   1391   if (!MFI->hasVarSizedObjects())
   1392     return false;
   1393   unsigned MaxA = MFI->getMaxAlignment();
   1394   if (MaxA <= getStackAlignment())
   1395     return false;
   1396   return true;
   1397 }
   1398 
   1399 
   1400 const MachineInstr *HexagonFrameLowering::getAlignaInstr(
   1401       const MachineFunction &MF) const {
   1402   for (auto &B : MF)
   1403     for (auto &I : B)
   1404       if (I.getOpcode() == Hexagon::ALIGNA)
   1405         return &I;
   1406   return nullptr;
   1407 }
   1408 
   1409 
   1410 // FIXME: Use Function::optForSize().
   1411 inline static bool isOptSize(const MachineFunction &MF) {
   1412   AttributeSet AF = MF.getFunction()->getAttributes();
   1413   return AF.hasAttribute(AttributeSet::FunctionIndex,
   1414                          Attribute::OptimizeForSize);
   1415 }
   1416 
   1417 inline static bool isMinSize(const MachineFunction &MF) {
   1418   return MF.getFunction()->optForMinSize();
   1419 }
   1420 
   1421 
   1422 /// Determine whether the callee-saved register saves and restores should
   1423 /// be generated via inline code. If this function returns "true", inline
   1424 /// code will be generated. If this function returns "false", additional
   1425 /// checks are performed, which may still lead to the inline code.
   1426 bool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF,
   1427       const CSIVect &CSI) const {
   1428   if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
   1429     return true;
   1430   if (!isOptSize(MF) && !isMinSize(MF))
   1431     if (MF.getTarget().getOptLevel() > CodeGenOpt::Default)
   1432       return true;
   1433 
   1434   // Check if CSI only has double registers, and if the registers form
   1435   // a contiguous block starting from D8.
   1436   BitVector Regs(Hexagon::NUM_TARGET_REGS);
   1437   for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
   1438     unsigned R = CSI[i].getReg();
   1439     if (!Hexagon::DoubleRegsRegClass.contains(R))
   1440       return true;
   1441     Regs[R] = true;
   1442   }
   1443   int F = Regs.find_first();
   1444   if (F != Hexagon::D8)
   1445     return true;
   1446   while (F >= 0) {
   1447     int N = Regs.find_next(F);
   1448     if (N >= 0 && N != F+1)
   1449       return true;
   1450     F = N;
   1451   }
   1452 
   1453   return false;
   1454 }
   1455 
   1456 
   1457 bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF,
   1458       const CSIVect &CSI) const {
   1459   if (shouldInlineCSR(MF, CSI))
   1460     return false;
   1461   unsigned NumCSI = CSI.size();
   1462   if (NumCSI <= 1)
   1463     return false;
   1464 
   1465   unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs
   1466                                      : SpillFuncThreshold;
   1467   return Threshold < NumCSI;
   1468 }
   1469 
   1470 
   1471 bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF,
   1472       const CSIVect &CSI) const {
   1473   if (shouldInlineCSR(MF, CSI))
   1474     return false;
   1475   unsigned NumCSI = CSI.size();
   1476   unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs-1
   1477                                      : SpillFuncThreshold;
   1478   return Threshold < NumCSI;
   1479 }
   1480