1 //===--- AArch64StorePairSuppress.cpp --- Suppress store pair formation ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass identifies floating point stores that should not be combined into 11 // store pairs. Later we may do the same for floating point loads. 12 // ===---------------------------------------------------------------------===// 13 14 #include "AArch64InstrInfo.h" 15 #include "llvm/CodeGen/MachineFunction.h" 16 #include "llvm/CodeGen/MachineFunctionPass.h" 17 #include "llvm/CodeGen/MachineInstr.h" 18 #include "llvm/CodeGen/MachineTraceMetrics.h" 19 #include "llvm/CodeGen/TargetSchedule.h" 20 #include "llvm/Support/Debug.h" 21 #include "llvm/Support/raw_ostream.h" 22 #include "llvm/Target/TargetInstrInfo.h" 23 24 using namespace llvm; 25 26 #define DEBUG_TYPE "aarch64-stp-suppress" 27 28 namespace { 29 class AArch64StorePairSuppress : public MachineFunctionPass { 30 const AArch64InstrInfo *TII; 31 const TargetRegisterInfo *TRI; 32 const MachineRegisterInfo *MRI; 33 MachineFunction *MF; 34 TargetSchedModel SchedModel; 35 MachineTraceMetrics *Traces; 36 MachineTraceMetrics::Ensemble *MinInstr; 37 38 public: 39 static char ID; 40 AArch64StorePairSuppress() : MachineFunctionPass(ID) {} 41 42 virtual const char *getPassName() const override { 43 return "AArch64 Store Pair Suppression"; 44 } 45 46 bool runOnMachineFunction(MachineFunction &F) override; 47 48 private: 49 bool shouldAddSTPToBlock(const MachineBasicBlock *BB); 50 51 bool isNarrowFPStore(const MachineInstr &MI); 52 53 virtual void getAnalysisUsage(AnalysisUsage &AU) const override { 54 AU.setPreservesCFG(); 55 AU.addRequired<MachineTraceMetrics>(); 56 AU.addPreserved<MachineTraceMetrics>(); 57 MachineFunctionPass::getAnalysisUsage(AU); 58 } 59 }; 60 char AArch64StorePairSuppress::ID = 0; 61 } // anonymous 62 63 FunctionPass *llvm::createAArch64StorePairSuppressPass() { 64 return new AArch64StorePairSuppress(); 65 } 66 67 /// Return true if an STP can be added to this block without increasing the 68 /// critical resource height. STP is good to form in Ld/St limited blocks and 69 /// bad to form in float-point limited blocks. This is true independent of the 70 /// critical path. If the critical path is longer than the resource height, the 71 /// extra vector ops can limit physreg renaming. Otherwise, it could simply 72 /// oversaturate the vector units. 73 bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) { 74 if (!MinInstr) 75 MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); 76 77 MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB); 78 unsigned ResLength = BBTrace.getResourceLength(); 79 80 // Get the machine model's scheduling class for STPQi. 81 // Bypass TargetSchedule's SchedClass resolution since we only have an opcode. 82 unsigned SCIdx = TII->get(AArch64::STPDi).getSchedClass(); 83 const MCSchedClassDesc *SCDesc = 84 SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx); 85 86 // If a subtarget does not define resources for STPQi, bail here. 87 if (SCDesc->isValid() && !SCDesc->isVariant()) { 88 unsigned ResLenWithSTP = BBTrace.getResourceLength( 89 ArrayRef<const MachineBasicBlock *>(), SCDesc); 90 if (ResLenWithSTP > ResLength) { 91 DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber() 92 << " resources " << ResLength << " -> " << ResLenWithSTP 93 << "\n"); 94 return false; 95 } 96 } 97 return true; 98 } 99 100 /// Return true if this is a floating-point store smaller than the V reg. On 101 /// cyclone, these require a vector shuffle before storing a pair. 102 /// Ideally we would call getMatchingPairOpcode() and have the machine model 103 /// tell us if it's profitable with no cpu knowledge here. 104 /// 105 /// FIXME: We plan to develop a decent Target abstraction for simple loads and 106 /// stores. Until then use a nasty switch similar to AArch64LoadStoreOptimizer. 107 bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) { 108 switch (MI.getOpcode()) { 109 default: 110 return false; 111 case AArch64::STRSui: 112 case AArch64::STRDui: 113 case AArch64::STURSi: 114 case AArch64::STURDi: 115 return true; 116 } 117 } 118 119 bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) { 120 MF = &mf; 121 TII = static_cast<const AArch64InstrInfo *>(MF->getTarget().getInstrInfo()); 122 TRI = MF->getTarget().getRegisterInfo(); 123 MRI = &MF->getRegInfo(); 124 const TargetSubtargetInfo &ST = 125 MF->getTarget().getSubtarget<TargetSubtargetInfo>(); 126 SchedModel.init(*ST.getSchedModel(), &ST, TII); 127 128 Traces = &getAnalysis<MachineTraceMetrics>(); 129 MinInstr = nullptr; 130 131 DEBUG(dbgs() << "*** " << getPassName() << ": " << MF->getName() << '\n'); 132 133 if (!SchedModel.hasInstrSchedModel()) { 134 DEBUG(dbgs() << " Skipping pass: no machine model present.\n"); 135 return false; 136 } 137 138 // Check for a sequence of stores to the same base address. We don't need to 139 // precisely determine whether a store pair can be formed. But we do want to 140 // filter out most situations where we can't form store pairs to avoid 141 // computing trace metrics in those cases. 142 for (auto &MBB : *MF) { 143 bool SuppressSTP = false; 144 unsigned PrevBaseReg = 0; 145 for (auto &MI : MBB) { 146 if (!isNarrowFPStore(MI)) 147 continue; 148 unsigned BaseReg; 149 unsigned Offset; 150 if (TII->getLdStBaseRegImmOfs(&MI, BaseReg, Offset, TRI)) { 151 if (PrevBaseReg == BaseReg) { 152 // If this block can take STPs, skip ahead to the next block. 153 if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent())) 154 break; 155 // Otherwise, continue unpairing the stores in this block. 156 DEBUG(dbgs() << "Unpairing store " << MI << "\n"); 157 SuppressSTP = true; 158 TII->suppressLdStPair(&MI); 159 } 160 PrevBaseReg = BaseReg; 161 } else 162 PrevBaseReg = 0; 163 } 164 } 165 // This pass just sets some internal MachineMemOperand flags. It can't really 166 // invalidate anything. 167 return false; 168 } 169