Home | History | Annotate | Download | only in Mips
      1 //===- MipsDelaySlotFiller.cpp - Mips Delay Slot Filler -------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Simple pass to fill delay slots with useful instructions.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "MCTargetDesc/MipsMCNaCl.h"
     15 #include "Mips.h"
     16 #include "MipsInstrInfo.h"
     17 #include "MipsRegisterInfo.h"
     18 #include "MipsSubtarget.h"
     19 #include "llvm/ADT/BitVector.h"
     20 #include "llvm/ADT/DenseMap.h"
     21 #include "llvm/ADT/PointerUnion.h"
     22 #include "llvm/ADT/SmallPtrSet.h"
     23 #include "llvm/ADT/SmallVector.h"
     24 #include "llvm/ADT/Statistic.h"
     25 #include "llvm/ADT/StringRef.h"
     26 #include "llvm/Analysis/AliasAnalysis.h"
     27 #include "llvm/Analysis/ValueTracking.h"
     28 #include "llvm/CodeGen/MachineBasicBlock.h"
     29 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
     30 #include "llvm/CodeGen/MachineFunction.h"
     31 #include "llvm/CodeGen/MachineFunctionPass.h"
     32 #include "llvm/CodeGen/MachineInstr.h"
     33 #include "llvm/CodeGen/MachineInstrBuilder.h"
     34 #include "llvm/CodeGen/MachineOperand.h"
     35 #include "llvm/CodeGen/MachineRegisterInfo.h"
     36 #include "llvm/CodeGen/PseudoSourceValue.h"
     37 #include "llvm/CodeGen/TargetRegisterInfo.h"
     38 #include "llvm/CodeGen/TargetSubtargetInfo.h"
     39 #include "llvm/MC/MCInstrDesc.h"
     40 #include "llvm/MC/MCRegisterInfo.h"
     41 #include "llvm/Support/Casting.h"
     42 #include "llvm/Support/CodeGen.h"
     43 #include "llvm/Support/CommandLine.h"
     44 #include "llvm/Support/ErrorHandling.h"
     45 #include "llvm/Target/TargetMachine.h"
     46 #include <algorithm>
     47 #include <cassert>
     48 #include <iterator>
     49 #include <memory>
     50 #include <utility>
     51 
     52 using namespace llvm;
     53 
     54 #define DEBUG_TYPE "mips-delay-slot-filler"
     55 
     56 STATISTIC(FilledSlots, "Number of delay slots filled");
     57 STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that"
     58                        " are not NOP.");
     59 
     60 static cl::opt<bool> DisableDelaySlotFiller(
     61   "disable-mips-delay-filler",
     62   cl::init(false),
     63   cl::desc("Fill all delay slots with NOPs."),
     64   cl::Hidden);
     65 
     66 static cl::opt<bool> DisableForwardSearch(
     67   "disable-mips-df-forward-search",
     68   cl::init(true),
     69   cl::desc("Disallow MIPS delay filler to search forward."),
     70   cl::Hidden);
     71 
     72 static cl::opt<bool> DisableSuccBBSearch(
     73   "disable-mips-df-succbb-search",
     74   cl::init(true),
     75   cl::desc("Disallow MIPS delay filler to search successor basic blocks."),
     76   cl::Hidden);
     77 
     78 static cl::opt<bool> DisableBackwardSearch(
     79   "disable-mips-df-backward-search",
     80   cl::init(false),
     81   cl::desc("Disallow MIPS delay filler to search backward."),
     82   cl::Hidden);
     83 
     84 enum CompactBranchPolicy {
     85   CB_Never,   ///< The policy 'never' may in some circumstances or for some
     86               ///< ISAs not be absolutely adhered to.
     87   CB_Optimal, ///< Optimal is the default and will produce compact branches
     88               ///< when delay slots cannot be filled.
     89   CB_Always   ///< 'always' may in some circumstances may not be
     90               ///< absolutely adhered to there may not be a corresponding
     91               ///< compact form of a branch.
     92 };
     93 
     94 static cl::opt<CompactBranchPolicy> MipsCompactBranchPolicy(
     95   "mips-compact-branches",cl::Optional,
     96   cl::init(CB_Optimal),
     97   cl::desc("MIPS Specific: Compact branch policy."),
     98   cl::values(
     99     clEnumValN(CB_Never, "never", "Do not use compact branches if possible."),
    100     clEnumValN(CB_Optimal, "optimal", "Use compact branches where appropiate (default)."),
    101     clEnumValN(CB_Always, "always", "Always use compact branches if possible.")
    102   )
    103 );
    104 
    105 namespace {
    106 
    107   using Iter = MachineBasicBlock::iterator;
    108   using ReverseIter = MachineBasicBlock::reverse_iterator;
    109   using BB2BrMap = SmallDenseMap<MachineBasicBlock *, MachineInstr *, 2>;
    110 
    111   class RegDefsUses {
    112   public:
    113     RegDefsUses(const TargetRegisterInfo &TRI);
    114 
    115     void init(const MachineInstr &MI);
    116 
    117     /// This function sets all caller-saved registers in Defs.
    118     void setCallerSaved(const MachineInstr &MI);
    119 
    120     /// This function sets all unallocatable registers in Defs.
    121     void setUnallocatableRegs(const MachineFunction &MF);
    122 
    123     /// Set bits in Uses corresponding to MBB's live-out registers except for
    124     /// the registers that are live-in to SuccBB.
    125     void addLiveOut(const MachineBasicBlock &MBB,
    126                     const MachineBasicBlock &SuccBB);
    127 
    128     bool update(const MachineInstr &MI, unsigned Begin, unsigned End);
    129 
    130   private:
    131     bool checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses, unsigned Reg,
    132                           bool IsDef) const;
    133 
    134     /// Returns true if Reg or its alias is in RegSet.
    135     bool isRegInSet(const BitVector &RegSet, unsigned Reg) const;
    136 
    137     const TargetRegisterInfo &TRI;
    138     BitVector Defs, Uses;
    139   };
    140 
    141   /// Base class for inspecting loads and stores.
    142   class InspectMemInstr {
    143   public:
    144     InspectMemInstr(bool ForbidMemInstr_) : ForbidMemInstr(ForbidMemInstr_) {}
    145     virtual ~InspectMemInstr() = default;
    146 
    147     /// Return true if MI cannot be moved to delay slot.
    148     bool hasHazard(const MachineInstr &MI);
    149 
    150   protected:
    151     /// Flags indicating whether loads or stores have been seen.
    152     bool OrigSeenLoad = false;
    153     bool OrigSeenStore = false;
    154     bool SeenLoad = false;
    155     bool SeenStore = false;
    156 
    157     /// Memory instructions are not allowed to move to delay slot if this flag
    158     /// is true.
    159     bool ForbidMemInstr;
    160 
    161   private:
    162     virtual bool hasHazard_(const MachineInstr &MI) = 0;
    163   };
    164 
    165   /// This subclass rejects any memory instructions.
    166   class NoMemInstr : public InspectMemInstr {
    167   public:
    168     NoMemInstr() : InspectMemInstr(true) {}
    169 
    170   private:
    171     bool hasHazard_(const MachineInstr &MI) override { return true; }
    172   };
    173 
    174   /// This subclass accepts loads from stacks and constant loads.
    175   class LoadFromStackOrConst : public InspectMemInstr {
    176   public:
    177     LoadFromStackOrConst() : InspectMemInstr(false) {}
    178 
    179   private:
    180     bool hasHazard_(const MachineInstr &MI) override;
    181   };
    182 
    183   /// This subclass uses memory dependence information to determine whether a
    184   /// memory instruction can be moved to a delay slot.
    185   class MemDefsUses : public InspectMemInstr {
    186   public:
    187     MemDefsUses(const DataLayout &DL, const MachineFrameInfo *MFI);
    188 
    189   private:
    190     using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>;
    191 
    192     bool hasHazard_(const MachineInstr &MI) override;
    193 
    194     /// Update Defs and Uses. Return true if there exist dependences that
    195     /// disqualify the delay slot candidate between V and values in Uses and
    196     /// Defs.
    197     bool updateDefsUses(ValueType V, bool MayStore);
    198 
    199     /// Get the list of underlying objects of MI's memory operand.
    200     bool getUnderlyingObjects(const MachineInstr &MI,
    201                               SmallVectorImpl<ValueType> &Objects) const;
    202 
    203     const MachineFrameInfo *MFI;
    204     SmallPtrSet<ValueType, 4> Uses, Defs;
    205     const DataLayout &DL;
    206 
    207     /// Flags indicating whether loads or stores with no underlying objects have
    208     /// been seen.
    209     bool SeenNoObjLoad = false;
    210     bool SeenNoObjStore = false;
    211   };
    212 
    213   class MipsDelaySlotFiller : public MachineFunctionPass {
    214   public:
    215     MipsDelaySlotFiller() : MachineFunctionPass(ID) {
    216       initializeMipsDelaySlotFillerPass(*PassRegistry::getPassRegistry());
    217     }
    218 
    219     StringRef getPassName() const override { return "Mips Delay Slot Filler"; }
    220 
    221     bool runOnMachineFunction(MachineFunction &F) override {
    222       TM = &F.getTarget();
    223       bool Changed = false;
    224       for (MachineFunction::iterator FI = F.begin(), FE = F.end();
    225            FI != FE; ++FI)
    226         Changed |= runOnMachineBasicBlock(*FI);
    227 
    228       // This pass invalidates liveness information when it reorders
    229       // instructions to fill delay slot. Without this, -verify-machineinstrs
    230       // will fail.
    231       if (Changed)
    232         F.getRegInfo().invalidateLiveness();
    233 
    234       return Changed;
    235     }
    236 
    237     MachineFunctionProperties getRequiredProperties() const override {
    238       return MachineFunctionProperties().set(
    239           MachineFunctionProperties::Property::NoVRegs);
    240     }
    241 
    242     void getAnalysisUsage(AnalysisUsage &AU) const override {
    243       AU.addRequired<MachineBranchProbabilityInfo>();
    244       MachineFunctionPass::getAnalysisUsage(AU);
    245     }
    246 
    247     static char ID;
    248 
    249   private:
    250     bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
    251 
    252     Iter replaceWithCompactBranch(MachineBasicBlock &MBB, Iter Branch,
    253                                   const DebugLoc &DL);
    254 
    255     /// This function checks if it is valid to move Candidate to the delay slot
    256     /// and returns true if it isn't. It also updates memory and register
    257     /// dependence information.
    258     bool delayHasHazard(const MachineInstr &Candidate, RegDefsUses &RegDU,
    259                         InspectMemInstr &IM) const;
    260 
    261     /// This function searches range [Begin, End) for an instruction that can be
    262     /// moved to the delay slot. Returns true on success.
    263     template<typename IterTy>
    264     bool searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
    265                      RegDefsUses &RegDU, InspectMemInstr &IM, Iter Slot,
    266                      IterTy &Filler) const;
    267 
    268     /// This function searches in the backward direction for an instruction that
    269     /// can be moved to the delay slot. Returns true on success.
    270     bool searchBackward(MachineBasicBlock &MBB, MachineInstr &Slot) const;
    271 
    272     /// This function searches MBB in the forward direction for an instruction
    273     /// that can be moved to the delay slot. Returns true on success.
    274     bool searchForward(MachineBasicBlock &MBB, Iter Slot) const;
    275 
    276     /// This function searches one of MBB's successor blocks for an instruction
    277     /// that can be moved to the delay slot and inserts clones of the
    278     /// instruction into the successor's predecessor blocks.
    279     bool searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const;
    280 
    281     /// Pick a successor block of MBB. Return NULL if MBB doesn't have a
    282     /// successor block that is not a landing pad.
    283     MachineBasicBlock *selectSuccBB(MachineBasicBlock &B) const;
    284 
    285     /// This function analyzes MBB and returns an instruction with an unoccupied
    286     /// slot that branches to Dst.
    287     std::pair<MipsInstrInfo::BranchType, MachineInstr *>
    288     getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const;
    289 
    290     /// Examine Pred and see if it is possible to insert an instruction into
    291     /// one of its branches delay slot or its end.
    292     bool examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ,
    293                      RegDefsUses &RegDU, bool &HasMultipleSuccs,
    294                      BB2BrMap &BrMap) const;
    295 
    296     bool terminateSearch(const MachineInstr &Candidate) const;
    297 
    298     const TargetMachine *TM = nullptr;
    299   };
    300 
    301 } // end anonymous namespace
    302 
    303 char MipsDelaySlotFiller::ID = 0;
    304 
    305 static bool hasUnoccupiedSlot(const MachineInstr *MI) {
    306   return MI->hasDelaySlot() && !MI->isBundledWithSucc();
    307 }
    308 
    309 INITIALIZE_PASS(MipsDelaySlotFiller, DEBUG_TYPE,
    310                 "Fill delay slot for MIPS", false, false)
    311 
    312 /// This function inserts clones of Filler into predecessor blocks.
    313 static void insertDelayFiller(Iter Filler, const BB2BrMap &BrMap) {
    314   MachineFunction *MF = Filler->getParent()->getParent();
    315 
    316   for (BB2BrMap::const_iterator I = BrMap.begin(); I != BrMap.end(); ++I) {
    317     if (I->second) {
    318       MIBundleBuilder(I->second).append(MF->CloneMachineInstr(&*Filler));
    319       ++UsefulSlots;
    320     } else {
    321       I->first->insert(I->first->end(), MF->CloneMachineInstr(&*Filler));
    322     }
    323   }
    324 }
    325 
    326 /// This function adds registers Filler defines to MBB's live-in register list.
    327 static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB) {
    328   for (unsigned I = 0, E = Filler->getNumOperands(); I != E; ++I) {
    329     const MachineOperand &MO = Filler->getOperand(I);
    330     unsigned R;
    331 
    332     if (!MO.isReg() || !MO.isDef() || !(R = MO.getReg()))
    333       continue;
    334 
    335 #ifndef NDEBUG
    336     const MachineFunction &MF = *MBB.getParent();
    337     assert(MF.getSubtarget().getRegisterInfo()->getAllocatableSet(MF).test(R) &&
    338            "Shouldn't move an instruction with unallocatable registers across "
    339            "basic block boundaries.");
    340 #endif
    341 
    342     if (!MBB.isLiveIn(R))
    343       MBB.addLiveIn(R);
    344   }
    345 }
    346 
    347 RegDefsUses::RegDefsUses(const TargetRegisterInfo &TRI)
    348     : TRI(TRI), Defs(TRI.getNumRegs(), false), Uses(TRI.getNumRegs(), false) {}
    349 
    350 void RegDefsUses::init(const MachineInstr &MI) {
    351   // Add all register operands which are explicit and non-variadic.
    352   update(MI, 0, MI.getDesc().getNumOperands());
    353 
    354   // If MI is a call, add RA to Defs to prevent users of RA from going into
    355   // delay slot.
    356   if (MI.isCall())
    357     Defs.set(Mips::RA);
    358 
    359   // Add all implicit register operands of branch instructions except
    360   // register AT.
    361   if (MI.isBranch()) {
    362     update(MI, MI.getDesc().getNumOperands(), MI.getNumOperands());
    363     Defs.reset(Mips::AT);
    364   }
    365 }
    366 
    367 void RegDefsUses::setCallerSaved(const MachineInstr &MI) {
    368   assert(MI.isCall());
    369 
    370   // Add RA/RA_64 to Defs to prevent users of RA/RA_64 from going into
    371   // the delay slot. The reason is that RA/RA_64 must not be changed
    372   // in the delay slot so that the callee can return to the caller.
    373   if (MI.definesRegister(Mips::RA) || MI.definesRegister(Mips::RA_64)) {
    374     Defs.set(Mips::RA);
    375     Defs.set(Mips::RA_64);
    376   }
    377 
    378   // If MI is a call, add all caller-saved registers to Defs.
    379   BitVector CallerSavedRegs(TRI.getNumRegs(), true);
    380 
    381   CallerSavedRegs.reset(Mips::ZERO);
    382   CallerSavedRegs.reset(Mips::ZERO_64);
    383 
    384   for (const MCPhysReg *R = TRI.getCalleeSavedRegs(MI.getParent()->getParent());
    385        *R; ++R)
    386     for (MCRegAliasIterator AI(*R, &TRI, true); AI.isValid(); ++AI)
    387       CallerSavedRegs.reset(*AI);
    388 
    389   Defs |= CallerSavedRegs;
    390 }
    391 
    392 void RegDefsUses::setUnallocatableRegs(const MachineFunction &MF) {
    393   BitVector AllocSet = TRI.getAllocatableSet(MF);
    394 
    395   for (unsigned R : AllocSet.set_bits())
    396     for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI)
    397       AllocSet.set(*AI);
    398 
    399   AllocSet.set(Mips::ZERO);
    400   AllocSet.set(Mips::ZERO_64);
    401 
    402   Defs |= AllocSet.flip();
    403 }
    404 
    405 void RegDefsUses::addLiveOut(const MachineBasicBlock &MBB,
    406                              const MachineBasicBlock &SuccBB) {
    407   for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
    408        SE = MBB.succ_end(); SI != SE; ++SI)
    409     if (*SI != &SuccBB)
    410       for (const auto &LI : (*SI)->liveins())
    411         Uses.set(LI.PhysReg);
    412 }
    413 
    414 bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) {
    415   BitVector NewDefs(TRI.getNumRegs()), NewUses(TRI.getNumRegs());
    416   bool HasHazard = false;
    417 
    418   for (unsigned I = Begin; I != End; ++I) {
    419     const MachineOperand &MO = MI.getOperand(I);
    420 
    421     if (MO.isReg() && MO.getReg())
    422       HasHazard |= checkRegDefsUses(NewDefs, NewUses, MO.getReg(), MO.isDef());
    423   }
    424 
    425   Defs |= NewDefs;
    426   Uses |= NewUses;
    427 
    428   return HasHazard;
    429 }
    430 
    431 bool RegDefsUses::checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses,
    432                                    unsigned Reg, bool IsDef) const {
    433   if (IsDef) {
    434     NewDefs.set(Reg);
    435     // check whether Reg has already been defined or used.
    436     return (isRegInSet(Defs, Reg) || isRegInSet(Uses, Reg));
    437   }
    438 
    439   NewUses.set(Reg);
    440   // check whether Reg has already been defined.
    441   return isRegInSet(Defs, Reg);
    442 }
    443 
    444 bool RegDefsUses::isRegInSet(const BitVector &RegSet, unsigned Reg) const {
    445   // Check Reg and all aliased Registers.
    446   for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
    447     if (RegSet.test(*AI))
    448       return true;
    449   return false;
    450 }
    451 
    452 bool InspectMemInstr::hasHazard(const MachineInstr &MI) {
    453   if (!MI.mayStore() && !MI.mayLoad())
    454     return false;
    455 
    456   if (ForbidMemInstr)
    457     return true;
    458 
    459   OrigSeenLoad = SeenLoad;
    460   OrigSeenStore = SeenStore;
    461   SeenLoad |= MI.mayLoad();
    462   SeenStore |= MI.mayStore();
    463 
    464   // If MI is an ordered or volatile memory reference, disallow moving
    465   // subsequent loads and stores to delay slot.
    466   if (MI.hasOrderedMemoryRef() && (OrigSeenLoad || OrigSeenStore)) {
    467     ForbidMemInstr = true;
    468     return true;
    469   }
    470 
    471   return hasHazard_(MI);
    472 }
    473 
    474 bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) {
    475   if (MI.mayStore())
    476     return true;
    477 
    478   if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getPseudoValue())
    479     return true;
    480 
    481   if (const PseudoSourceValue *PSV =
    482       (*MI.memoperands_begin())->getPseudoValue()) {
    483     if (isa<FixedStackPseudoSourceValue>(PSV))
    484       return false;
    485     return !PSV->isConstant(nullptr) && !PSV->isStack();
    486   }
    487 
    488   return true;
    489 }
    490 
    491 MemDefsUses::MemDefsUses(const DataLayout &DL, const MachineFrameInfo *MFI_)
    492     : InspectMemInstr(false), MFI(MFI_), DL(DL) {}
    493 
    494 bool MemDefsUses::hasHazard_(const MachineInstr &MI) {
    495   bool HasHazard = false;
    496   SmallVector<ValueType, 4> Objs;
    497 
    498   // Check underlying object list.
    499   if (getUnderlyingObjects(MI, Objs)) {
    500     for (SmallVectorImpl<ValueType>::const_iterator I = Objs.begin();
    501          I != Objs.end(); ++I)
    502       HasHazard |= updateDefsUses(*I, MI.mayStore());
    503 
    504     return HasHazard;
    505   }
    506 
    507   // No underlying objects found.
    508   HasHazard = MI.mayStore() && (OrigSeenLoad || OrigSeenStore);
    509   HasHazard |= MI.mayLoad() || OrigSeenStore;
    510 
    511   SeenNoObjLoad |= MI.mayLoad();
    512   SeenNoObjStore |= MI.mayStore();
    513 
    514   return HasHazard;
    515 }
    516 
    517 bool MemDefsUses::updateDefsUses(ValueType V, bool MayStore) {
    518   if (MayStore)
    519     return !Defs.insert(V).second || Uses.count(V) || SeenNoObjStore ||
    520            SeenNoObjLoad;
    521 
    522   Uses.insert(V);
    523   return Defs.count(V) || SeenNoObjStore;
    524 }
    525 
    526 bool MemDefsUses::
    527 getUnderlyingObjects(const MachineInstr &MI,
    528                      SmallVectorImpl<ValueType> &Objects) const {
    529   if (!MI.hasOneMemOperand() ||
    530       (!(*MI.memoperands_begin())->getValue() &&
    531        !(*MI.memoperands_begin())->getPseudoValue()))
    532     return false;
    533 
    534   if (const PseudoSourceValue *PSV =
    535       (*MI.memoperands_begin())->getPseudoValue()) {
    536     if (!PSV->isAliased(MFI))
    537       return false;
    538     Objects.push_back(PSV);
    539     return true;
    540   }
    541 
    542   const Value *V = (*MI.memoperands_begin())->getValue();
    543 
    544   SmallVector<Value *, 4> Objs;
    545   GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
    546 
    547   for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), E = Objs.end();
    548        I != E; ++I) {
    549     if (!isIdentifiedObject(V))
    550       return false;
    551 
    552     Objects.push_back(*I);
    553   }
    554 
    555   return true;
    556 }
    557 
    558 // Replace Branch with the compact branch instruction.
    559 Iter MipsDelaySlotFiller::replaceWithCompactBranch(MachineBasicBlock &MBB,
    560                                                    Iter Branch,
    561                                                    const DebugLoc &DL) {
    562   const MipsSubtarget &STI = MBB.getParent()->getSubtarget<MipsSubtarget>();
    563   const MipsInstrInfo *TII = STI.getInstrInfo();
    564 
    565   unsigned NewOpcode = TII->getEquivalentCompactForm(Branch);
    566   Branch = TII->genInstrWithNewOpc(NewOpcode, Branch);
    567 
    568   std::next(Branch)->eraseFromParent();
    569   return Branch;
    570 }
    571 
    572 // For given opcode returns opcode of corresponding instruction with short
    573 // delay slot.
    574 // For the pseudo TAILCALL*_MM instructions return the short delay slot
    575 // form. Unfortunately, TAILCALL<->b16 is denied as b16 has a limited range
    576 // that is too short to make use of for tail calls.
    577 static int getEquivalentCallShort(int Opcode) {
    578   switch (Opcode) {
    579   case Mips::BGEZAL:
    580     return Mips::BGEZALS_MM;
    581   case Mips::BLTZAL:
    582     return Mips::BLTZALS_MM;
    583   case Mips::JAL:
    584   case Mips::JAL_MM:
    585     return Mips::JALS_MM;
    586   case Mips::JALR:
    587     return Mips::JALRS_MM;
    588   case Mips::JALR16_MM:
    589     return Mips::JALRS16_MM;
    590   case Mips::TAILCALL_MM:
    591     llvm_unreachable("Attempting to shorten the TAILCALL_MM pseudo!");
    592   case Mips::TAILCALLREG:
    593     return Mips::JR16_MM;
    594   default:
    595     llvm_unreachable("Unexpected call instruction for microMIPS.");
    596   }
    597 }
    598 
    599 /// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
    600 /// We assume there is only one delay slot per delayed instruction.
    601 bool MipsDelaySlotFiller::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
    602   bool Changed = false;
    603   const MipsSubtarget &STI = MBB.getParent()->getSubtarget<MipsSubtarget>();
    604   bool InMicroMipsMode = STI.inMicroMipsMode();
    605   const MipsInstrInfo *TII = STI.getInstrInfo();
    606 
    607   for (Iter I = MBB.begin(); I != MBB.end(); ++I) {
    608     if (!hasUnoccupiedSlot(&*I))
    609       continue;
    610 
    611     // Delay slot filling is disabled at -O0, or in microMIPS32R6.
    612     if (!DisableDelaySlotFiller && (TM->getOptLevel() != CodeGenOpt::None) &&
    613         !(InMicroMipsMode && STI.hasMips32r6())) {
    614 
    615       bool Filled = false;
    616 
    617       if (MipsCompactBranchPolicy.getValue() != CB_Always ||
    618            !TII->getEquivalentCompactForm(I)) {
    619         if (searchBackward(MBB, *I)) {
    620           Filled = true;
    621         } else if (I->isTerminator()) {
    622           if (searchSuccBBs(MBB, I)) {
    623             Filled = true;
    624           }
    625         } else if (searchForward(MBB, I)) {
    626           Filled = true;
    627         }
    628       }
    629 
    630       if (Filled) {
    631         // Get instruction with delay slot.
    632         MachineBasicBlock::instr_iterator DSI = I.getInstrIterator();
    633 
    634         if (InMicroMipsMode && TII->getInstSizeInBytes(*std::next(DSI)) == 2 &&
    635             DSI->isCall()) {
    636           // If instruction in delay slot is 16b change opcode to
    637           // corresponding instruction with short delay slot.
    638 
    639           // TODO: Implement an instruction mapping table of 16bit opcodes to
    640           // 32bit opcodes so that an instruction can be expanded. This would
    641           // save 16 bits as a TAILCALL_MM pseudo requires a fullsized nop.
    642           // TODO: Permit b16 when branching backwards to the same function
    643           // if it is in range.
    644           DSI->setDesc(TII->get(getEquivalentCallShort(DSI->getOpcode())));
    645         }
    646         ++FilledSlots;
    647         Changed = true;
    648         continue;
    649       }
    650     }
    651 
    652     // For microMIPS if instruction is BEQ or BNE with one ZERO register, then
    653     // instead of adding NOP replace this instruction with the corresponding
    654     // compact branch instruction, i.e. BEQZC or BNEZC. Additionally
    655     // PseudoReturn and PseudoIndirectBranch are expanded to JR_MM, so they can
    656     // be replaced with JRC16_MM.
    657 
    658     // For MIPSR6 attempt to produce the corresponding compact (no delay slot)
    659     // form of the CTI. For indirect jumps this will not require inserting a
    660     // NOP and for branches will hopefully avoid requiring a NOP.
    661     if ((InMicroMipsMode ||
    662          (STI.hasMips32r6() && MipsCompactBranchPolicy != CB_Never)) &&
    663         TII->getEquivalentCompactForm(I)) {
    664       I = replaceWithCompactBranch(MBB, I, I->getDebugLoc());
    665       Changed = true;
    666       continue;
    667     }
    668 
    669     // Bundle the NOP to the instruction with the delay slot.
    670     BuildMI(MBB, std::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
    671     MIBundleBuilder(MBB, I, std::next(I, 2));
    672     ++FilledSlots;
    673     Changed = true;
    674   }
    675 
    676   return Changed;
    677 }
    678 
    679 template <typename IterTy>
    680 bool MipsDelaySlotFiller::searchRange(MachineBasicBlock &MBB, IterTy Begin,
    681                                       IterTy End, RegDefsUses &RegDU,
    682                                       InspectMemInstr &IM, Iter Slot,
    683                                       IterTy &Filler) const {
    684   for (IterTy I = Begin; I != End;) {
    685     IterTy CurrI = I;
    686     ++I;
    687 
    688     // skip debug value
    689     if (CurrI->isDebugInstr())
    690       continue;
    691 
    692     if (terminateSearch(*CurrI))
    693       break;
    694 
    695     assert((!CurrI->isCall() && !CurrI->isReturn() && !CurrI->isBranch()) &&
    696            "Cannot put calls, returns or branches in delay slot.");
    697 
    698     if (CurrI->isKill()) {
    699       CurrI->eraseFromParent();
    700       continue;
    701     }
    702 
    703     if (delayHasHazard(*CurrI, RegDU, IM))
    704       continue;
    705 
    706     const MipsSubtarget &STI = MBB.getParent()->getSubtarget<MipsSubtarget>();
    707     if (STI.isTargetNaCl()) {
    708       // In NaCl, instructions that must be masked are forbidden in delay slots.
    709       // We only check for loads, stores and SP changes.  Calls, returns and
    710       // branches are not checked because non-NaCl targets never put them in
    711       // delay slots.
    712       unsigned AddrIdx;
    713       if ((isBasePlusOffsetMemoryAccess(CurrI->getOpcode(), &AddrIdx) &&
    714            baseRegNeedsLoadStoreMask(CurrI->getOperand(AddrIdx).getReg())) ||
    715           CurrI->modifiesRegister(Mips::SP, STI.getRegisterInfo()))
    716         continue;
    717     }
    718 
    719     bool InMicroMipsMode = STI.inMicroMipsMode();
    720     const MipsInstrInfo *TII = STI.getInstrInfo();
    721     unsigned Opcode = (*Slot).getOpcode();
    722     // This is complicated by the tail call optimization. For non-PIC code
    723     // there is only a 32bit sized unconditional branch which can be assumed
    724     // to be able to reach the target. b16 only has a range of +/- 1 KB.
    725     // It's entirely possible that the target function is reachable with b16
    726     // but we don't have enough information to make that decision.
    727      if (InMicroMipsMode && TII->getInstSizeInBytes(*CurrI) == 2 &&
    728         (Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch ||
    729          Opcode == Mips::PseudoReturn || Opcode == Mips::TAILCALL))
    730       continue;
    731      // Instructions LWP/SWP should not be in a delay slot as that
    732      // results in unpredictable behaviour
    733      if (InMicroMipsMode && (Opcode == Mips::LWP_MM || Opcode == Mips::SWP_MM))
    734        continue;
    735 
    736     Filler = CurrI;
    737     return true;
    738   }
    739 
    740   return false;
    741 }
    742 
    743 bool MipsDelaySlotFiller::searchBackward(MachineBasicBlock &MBB,
    744                                          MachineInstr &Slot) const {
    745   if (DisableBackwardSearch)
    746     return false;
    747 
    748   auto *Fn = MBB.getParent();
    749   RegDefsUses RegDU(*Fn->getSubtarget().getRegisterInfo());
    750   MemDefsUses MemDU(Fn->getDataLayout(), &Fn->getFrameInfo());
    751   ReverseIter Filler;
    752 
    753   RegDU.init(Slot);
    754 
    755   MachineBasicBlock::iterator SlotI = Slot;
    756   if (!searchRange(MBB, ++SlotI.getReverse(), MBB.rend(), RegDU, MemDU, Slot,
    757                    Filler))
    758     return false;
    759 
    760   MBB.splice(std::next(SlotI), &MBB, Filler.getReverse());
    761   MIBundleBuilder(MBB, SlotI, std::next(SlotI, 2));
    762   ++UsefulSlots;
    763   return true;
    764 }
    765 
    766 bool MipsDelaySlotFiller::searchForward(MachineBasicBlock &MBB,
    767                                         Iter Slot) const {
    768   // Can handle only calls.
    769   if (DisableForwardSearch || !Slot->isCall())
    770     return false;
    771 
    772   RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo());
    773   NoMemInstr NM;
    774   Iter Filler;
    775 
    776   RegDU.setCallerSaved(*Slot);
    777 
    778   if (!searchRange(MBB, std::next(Slot), MBB.end(), RegDU, NM, Slot, Filler))
    779     return false;
    780 
    781   MBB.splice(std::next(Slot), &MBB, Filler);
    782   MIBundleBuilder(MBB, Slot, std::next(Slot, 2));
    783   ++UsefulSlots;
    784   return true;
    785 }
    786 
    787 bool MipsDelaySlotFiller::searchSuccBBs(MachineBasicBlock &MBB,
    788                                         Iter Slot) const {
    789   if (DisableSuccBBSearch)
    790     return false;
    791 
    792   MachineBasicBlock *SuccBB = selectSuccBB(MBB);
    793 
    794   if (!SuccBB)
    795     return false;
    796 
    797   RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo());
    798   bool HasMultipleSuccs = false;
    799   BB2BrMap BrMap;
    800   std::unique_ptr<InspectMemInstr> IM;
    801   Iter Filler;
    802   auto *Fn = MBB.getParent();
    803 
    804   // Iterate over SuccBB's predecessor list.
    805   for (MachineBasicBlock::pred_iterator PI = SuccBB->pred_begin(),
    806        PE = SuccBB->pred_end(); PI != PE; ++PI)
    807     if (!examinePred(**PI, *SuccBB, RegDU, HasMultipleSuccs, BrMap))
    808       return false;
    809 
    810   // Do not allow moving instructions which have unallocatable register operands
    811   // across basic block boundaries.
    812   RegDU.setUnallocatableRegs(*Fn);
    813 
    814   // Only allow moving loads from stack or constants if any of the SuccBB's
    815   // predecessors have multiple successors.
    816   if (HasMultipleSuccs) {
    817     IM.reset(new LoadFromStackOrConst());
    818   } else {
    819     const MachineFrameInfo &MFI = Fn->getFrameInfo();
    820     IM.reset(new MemDefsUses(Fn->getDataLayout(), &MFI));
    821   }
    822 
    823   if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Slot,
    824                    Filler))
    825     return false;
    826 
    827   insertDelayFiller(Filler, BrMap);
    828   addLiveInRegs(Filler, *SuccBB);
    829   Filler->eraseFromParent();
    830 
    831   return true;
    832 }
    833 
    834 MachineBasicBlock *
    835 MipsDelaySlotFiller::selectSuccBB(MachineBasicBlock &B) const {
    836   if (B.succ_empty())
    837     return nullptr;
    838 
    839   // Select the successor with the larget edge weight.
    840   auto &Prob = getAnalysis<MachineBranchProbabilityInfo>();
    841   MachineBasicBlock *S = *std::max_element(
    842       B.succ_begin(), B.succ_end(),
    843       [&](const MachineBasicBlock *Dst0, const MachineBasicBlock *Dst1) {
    844         return Prob.getEdgeProbability(&B, Dst0) <
    845                Prob.getEdgeProbability(&B, Dst1);
    846       });
    847   return S->isEHPad() ? nullptr : S;
    848 }
    849 
    850 std::pair<MipsInstrInfo::BranchType, MachineInstr *>
    851 MipsDelaySlotFiller::getBranch(MachineBasicBlock &MBB,
    852                                const MachineBasicBlock &Dst) const {
    853   const MipsInstrInfo *TII =
    854       MBB.getParent()->getSubtarget<MipsSubtarget>().getInstrInfo();
    855   MachineBasicBlock *TrueBB = nullptr, *FalseBB = nullptr;
    856   SmallVector<MachineInstr*, 2> BranchInstrs;
    857   SmallVector<MachineOperand, 2> Cond;
    858 
    859   MipsInstrInfo::BranchType R =
    860       TII->analyzeBranch(MBB, TrueBB, FalseBB, Cond, false, BranchInstrs);
    861 
    862   if ((R == MipsInstrInfo::BT_None) || (R == MipsInstrInfo::BT_NoBranch))
    863     return std::make_pair(R, nullptr);
    864 
    865   if (R != MipsInstrInfo::BT_CondUncond) {
    866     if (!hasUnoccupiedSlot(BranchInstrs[0]))
    867       return std::make_pair(MipsInstrInfo::BT_None, nullptr);
    868 
    869     assert(((R != MipsInstrInfo::BT_Uncond) || (TrueBB == &Dst)));
    870 
    871     return std::make_pair(R, BranchInstrs[0]);
    872   }
    873 
    874   assert((TrueBB == &Dst) || (FalseBB == &Dst));
    875 
    876   // Examine the conditional branch. See if its slot is occupied.
    877   if (hasUnoccupiedSlot(BranchInstrs[0]))
    878     return std::make_pair(MipsInstrInfo::BT_Cond, BranchInstrs[0]);
    879 
    880   // If that fails, try the unconditional branch.
    881   if (hasUnoccupiedSlot(BranchInstrs[1]) && (FalseBB == &Dst))
    882     return std::make_pair(MipsInstrInfo::BT_Uncond, BranchInstrs[1]);
    883 
    884   return std::make_pair(MipsInstrInfo::BT_None, nullptr);
    885 }
    886 
    887 bool MipsDelaySlotFiller::examinePred(MachineBasicBlock &Pred,
    888                                       const MachineBasicBlock &Succ,
    889                                       RegDefsUses &RegDU,
    890                                       bool &HasMultipleSuccs,
    891                                       BB2BrMap &BrMap) const {
    892   std::pair<MipsInstrInfo::BranchType, MachineInstr *> P =
    893       getBranch(Pred, Succ);
    894 
    895   // Return if either getBranch wasn't able to analyze the branches or there
    896   // were no branches with unoccupied slots.
    897   if (P.first == MipsInstrInfo::BT_None)
    898     return false;
    899 
    900   if ((P.first != MipsInstrInfo::BT_Uncond) &&
    901       (P.first != MipsInstrInfo::BT_NoBranch)) {
    902     HasMultipleSuccs = true;
    903     RegDU.addLiveOut(Pred, Succ);
    904   }
    905 
    906   BrMap[&Pred] = P.second;
    907   return true;
    908 }
    909 
    910 bool MipsDelaySlotFiller::delayHasHazard(const MachineInstr &Candidate,
    911                                          RegDefsUses &RegDU,
    912                                          InspectMemInstr &IM) const {
    913   assert(!Candidate.isKill() &&
    914          "KILL instructions should have been eliminated at this point.");
    915 
    916   bool HasHazard = Candidate.isImplicitDef();
    917 
    918   HasHazard |= IM.hasHazard(Candidate);
    919   HasHazard |= RegDU.update(Candidate, 0, Candidate.getNumOperands());
    920 
    921   return HasHazard;
    922 }
    923 
    924 bool MipsDelaySlotFiller::terminateSearch(const MachineInstr &Candidate) const {
    925   return (Candidate.isTerminator() || Candidate.isCall() ||
    926           Candidate.isPosition() || Candidate.isInlineAsm() ||
    927           Candidate.hasUnmodeledSideEffects());
    928 }
    929 
    930 /// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
    931 /// slots in Mips MachineFunctions
    932 FunctionPass *llvm::createMipsDelaySlotFillerPass() { return new MipsDelaySlotFiller(); }
    933