Home | History | Annotate | Download | only in CodeGen
      1 //===- ExecutionDepsFix.cpp - Fix execution dependecy issues ----*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains the execution dependency fix pass.
     11 //
     12 // Some X86 SSE instructions like mov, and, or, xor are available in different
     13 // variants for different operand types. These variant instructions are
     14 // equivalent, but on Nehalem and newer cpus there is extra latency
     15 // transferring data between integer and floating point domains.  ARM cores
     16 // have similar issues when they are configured with both VFP and NEON
     17 // pipelines.
     18 //
     19 // This pass changes the variant instructions to minimize domain crossings.
     20 //
     21 //===----------------------------------------------------------------------===//
     22 
     23 #define DEBUG_TYPE "execution-fix"
     24 #include "llvm/CodeGen/MachineFunctionPass.h"
     25 #include "llvm/CodeGen/MachineRegisterInfo.h"
     26 #include "llvm/CodeGen/Passes.h"
     27 #include "llvm/Target/TargetInstrInfo.h"
     28 #include "llvm/Target/TargetMachine.h"
     29 #include "llvm/ADT/DepthFirstIterator.h"
     30 #include "llvm/Support/Allocator.h"
     31 #include "llvm/Support/Debug.h"
     32 #include "llvm/Support/raw_ostream.h"
     33 using namespace llvm;
     34 
     35 /// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
     36 /// of execution domains.
     37 ///
     38 /// An open DomainValue represents a set of instructions that can still switch
     39 /// execution domain. Multiple registers may refer to the same open
     40 /// DomainValue - they will eventually be collapsed to the same execution
     41 /// domain.
     42 ///
     43 /// A collapsed DomainValue represents a single register that has been forced
     44 /// into one of more execution domains. There is a separate collapsed
     45 /// DomainValue for each register, but it may contain multiple execution
     46 /// domains. A register value is initially created in a single execution
     47 /// domain, but if we were forced to pay the penalty of a domain crossing, we
     48 /// keep track of the fact the the register is now available in multiple
     49 /// domains.
     50 namespace {
     51 struct DomainValue {
     52   // Basic reference counting.
     53   unsigned Refs;
     54 
     55   // Bitmask of available domains. For an open DomainValue, it is the still
     56   // possible domains for collapsing. For a collapsed DomainValue it is the
     57   // domains where the register is available for free.
     58   unsigned AvailableDomains;
     59 
     60   // Position of the last defining instruction.
     61   unsigned Dist;
     62 
     63   // Twiddleable instructions using or defining these registers.
     64   SmallVector<MachineInstr*, 8> Instrs;
     65 
     66   // A collapsed DomainValue has no instructions to twiddle - it simply keeps
     67   // track of the domains where the registers are already available.
     68   bool isCollapsed() const { return Instrs.empty(); }
     69 
     70   // Is domain available?
     71   bool hasDomain(unsigned domain) const {
     72     return AvailableDomains & (1u << domain);
     73   }
     74 
     75   // Mark domain as available.
     76   void addDomain(unsigned domain) {
     77     AvailableDomains |= 1u << domain;
     78   }
     79 
     80   // Restrict to a single domain available.
     81   void setSingleDomain(unsigned domain) {
     82     AvailableDomains = 1u << domain;
     83   }
     84 
     85   // Return bitmask of domains that are available and in mask.
     86   unsigned getCommonDomains(unsigned mask) const {
     87     return AvailableDomains & mask;
     88   }
     89 
     90   // First domain available.
     91   unsigned getFirstDomain() const {
     92     return CountTrailingZeros_32(AvailableDomains);
     93   }
     94 
     95   DomainValue() { clear(); }
     96 
     97   void clear() {
     98     Refs = AvailableDomains = Dist = 0;
     99     Instrs.clear();
    100   }
    101 };
    102 }
    103 
    104 namespace {
    105 class ExeDepsFix : public MachineFunctionPass {
    106   static char ID;
    107   SpecificBumpPtrAllocator<DomainValue> Allocator;
    108   SmallVector<DomainValue*,16> Avail;
    109 
    110   const TargetRegisterClass *const RC;
    111   MachineFunction *MF;
    112   const TargetInstrInfo *TII;
    113   const TargetRegisterInfo *TRI;
    114   MachineBasicBlock *MBB;
    115   std::vector<int> AliasMap;
    116   const unsigned NumRegs;
    117   DomainValue **LiveRegs;
    118   typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap;
    119   LiveOutMap LiveOuts;
    120   unsigned Distance;
    121 
    122 public:
    123   ExeDepsFix(const TargetRegisterClass *rc)
    124     : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {}
    125 
    126   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
    127     AU.setPreservesAll();
    128     MachineFunctionPass::getAnalysisUsage(AU);
    129   }
    130 
    131   virtual bool runOnMachineFunction(MachineFunction &MF);
    132 
    133   virtual const char *getPassName() const {
    134     return "SSE execution domain fixup";
    135   }
    136 
    137 private:
    138   // Register mapping.
    139   int RegIndex(unsigned Reg);
    140 
    141   // DomainValue allocation.
    142   DomainValue *Alloc(int domain = -1);
    143   void Recycle(DomainValue*);
    144 
    145   // LiveRegs manipulations.
    146   void SetLiveReg(int rx, DomainValue *DV);
    147   void Kill(int rx);
    148   void Force(int rx, unsigned domain);
    149   void Collapse(DomainValue *dv, unsigned domain);
    150   bool Merge(DomainValue *A, DomainValue *B);
    151 
    152   void enterBasicBlock();
    153   void visitGenericInstr(MachineInstr*);
    154   void visitSoftInstr(MachineInstr*, unsigned mask);
    155   void visitHardInstr(MachineInstr*, unsigned domain);
    156 };
    157 }
    158 
    159 char ExeDepsFix::ID = 0;
    160 
    161 /// Translate TRI register number to an index into our smaller tables of
    162 /// interesting registers. Return -1 for boring registers.
    163 int ExeDepsFix::RegIndex(unsigned Reg) {
    164   assert(Reg < AliasMap.size() && "Invalid register");
    165   return AliasMap[Reg];
    166 }
    167 
    168 DomainValue *ExeDepsFix::Alloc(int domain) {
    169   DomainValue *dv = Avail.empty() ?
    170                       new(Allocator.Allocate()) DomainValue :
    171                       Avail.pop_back_val();
    172   dv->Dist = Distance;
    173   if (domain >= 0)
    174     dv->addDomain(domain);
    175   return dv;
    176 }
    177 
    178 void ExeDepsFix::Recycle(DomainValue *dv) {
    179   assert(dv && "Cannot recycle NULL");
    180   dv->clear();
    181   Avail.push_back(dv);
    182 }
    183 
    184 /// Set LiveRegs[rx] = dv, updating reference counts.
    185 void ExeDepsFix::SetLiveReg(int rx, DomainValue *dv) {
    186   assert(unsigned(rx) < NumRegs && "Invalid index");
    187   if (!LiveRegs) {
    188     LiveRegs = new DomainValue*[NumRegs];
    189     std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0);
    190   }
    191 
    192   if (LiveRegs[rx] == dv)
    193     return;
    194   if (LiveRegs[rx]) {
    195     assert(LiveRegs[rx]->Refs && "Bad refcount");
    196     if (--LiveRegs[rx]->Refs == 0) Recycle(LiveRegs[rx]);
    197   }
    198   LiveRegs[rx] = dv;
    199   if (dv) ++dv->Refs;
    200 }
    201 
    202 // Kill register rx, recycle or collapse any DomainValue.
    203 void ExeDepsFix::Kill(int rx) {
    204   assert(unsigned(rx) < NumRegs && "Invalid index");
    205   if (!LiveRegs || !LiveRegs[rx]) return;
    206 
    207   // Before killing the last reference to an open DomainValue, collapse it to
    208   // the first available domain.
    209   if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->isCollapsed())
    210     Collapse(LiveRegs[rx], LiveRegs[rx]->getFirstDomain());
    211   else
    212     SetLiveReg(rx, 0);
    213 }
    214 
    215 /// Force register rx into domain.
    216 void ExeDepsFix::Force(int rx, unsigned domain) {
    217   assert(unsigned(rx) < NumRegs && "Invalid index");
    218   DomainValue *dv;
    219   if (LiveRegs && (dv = LiveRegs[rx])) {
    220     if (dv->isCollapsed())
    221       dv->addDomain(domain);
    222     else if (dv->hasDomain(domain))
    223       Collapse(dv, domain);
    224     else {
    225       // This is an incompatible open DomainValue. Collapse it to whatever and
    226       // force the new value into domain. This costs a domain crossing.
    227       Collapse(dv, dv->getFirstDomain());
    228       assert(LiveRegs[rx] && "Not live after collapse?");
    229       LiveRegs[rx]->addDomain(domain);
    230     }
    231   } else {
    232     // Set up basic collapsed DomainValue.
    233     SetLiveReg(rx, Alloc(domain));
    234   }
    235 }
    236 
    237 /// Collapse open DomainValue into given domain. If there are multiple
    238 /// registers using dv, they each get a unique collapsed DomainValue.
    239 void ExeDepsFix::Collapse(DomainValue *dv, unsigned domain) {
    240   assert(dv->hasDomain(domain) && "Cannot collapse");
    241 
    242   // Collapse all the instructions.
    243   while (!dv->Instrs.empty())
    244     TII->setExecutionDomain(dv->Instrs.pop_back_val(), domain);
    245   dv->setSingleDomain(domain);
    246 
    247   // If there are multiple users, give them new, unique DomainValues.
    248   if (LiveRegs && dv->Refs > 1)
    249     for (unsigned rx = 0; rx != NumRegs; ++rx)
    250       if (LiveRegs[rx] == dv)
    251         SetLiveReg(rx, Alloc(domain));
    252 }
    253 
    254 /// Merge - All instructions and registers in B are moved to A, and B is
    255 /// released.
    256 bool ExeDepsFix::Merge(DomainValue *A, DomainValue *B) {
    257   assert(!A->isCollapsed() && "Cannot merge into collapsed");
    258   assert(!B->isCollapsed() && "Cannot merge from collapsed");
    259   if (A == B)
    260     return true;
    261   // Restrict to the domains that A and B have in common.
    262   unsigned common = A->getCommonDomains(B->AvailableDomains);
    263   if (!common)
    264     return false;
    265   A->AvailableDomains = common;
    266   A->Dist = std::max(A->Dist, B->Dist);
    267   A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
    268   for (unsigned rx = 0; rx != NumRegs; ++rx)
    269     if (LiveRegs[rx] == B)
    270       SetLiveReg(rx, A);
    271   return true;
    272 }
    273 
    274 void ExeDepsFix::enterBasicBlock() {
    275   // Try to coalesce live-out registers from predecessors.
    276   for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
    277          e = MBB->livein_end(); i != e; ++i) {
    278     int rx = RegIndex(*i);
    279     if (rx < 0) continue;
    280     for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
    281            pe = MBB->pred_end(); pi != pe; ++pi) {
    282       LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
    283       if (fi == LiveOuts.end()) continue;
    284       DomainValue *pdv = fi->second[rx];
    285       if (!pdv) continue;
    286       if (!LiveRegs || !LiveRegs[rx]) {
    287         SetLiveReg(rx, pdv);
    288         continue;
    289       }
    290 
    291       // We have a live DomainValue from more than one predecessor.
    292       if (LiveRegs[rx]->isCollapsed()) {
    293         // We are already collapsed, but predecessor is not. Force him.
    294         unsigned domain = LiveRegs[rx]->getFirstDomain();
    295         if (!pdv->isCollapsed() && pdv->hasDomain(domain))
    296           Collapse(pdv, domain);
    297         continue;
    298       }
    299 
    300       // Currently open, merge in predecessor.
    301       if (!pdv->isCollapsed())
    302         Merge(LiveRegs[rx], pdv);
    303       else
    304         Force(rx, pdv->getFirstDomain());
    305     }
    306   }
    307 }
    308 
    309 // A hard instruction only works in one domain. All input registers will be
    310 // forced into that domain.
    311 void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
    312   // Collapse all uses.
    313   for (unsigned i = mi->getDesc().getNumDefs(),
    314                 e = mi->getDesc().getNumOperands(); i != e; ++i) {
    315     MachineOperand &mo = mi->getOperand(i);
    316     if (!mo.isReg()) continue;
    317     int rx = RegIndex(mo.getReg());
    318     if (rx < 0) continue;
    319     Force(rx, domain);
    320   }
    321 
    322   // Kill all defs and force them.
    323   for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
    324     MachineOperand &mo = mi->getOperand(i);
    325     if (!mo.isReg()) continue;
    326     int rx = RegIndex(mo.getReg());
    327     if (rx < 0) continue;
    328     Kill(rx);
    329     Force(rx, domain);
    330   }
    331 }
    332 
    333 // A soft instruction can be changed to work in other domains given by mask.
    334 void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
    335   // Bitmask of available domains for this instruction after taking collapsed
    336   // operands into account.
    337   unsigned available = mask;
    338 
    339   // Scan the explicit use operands for incoming domains.
    340   SmallVector<int, 4> used;
    341   if (LiveRegs)
    342     for (unsigned i = mi->getDesc().getNumDefs(),
    343                   e = mi->getDesc().getNumOperands(); i != e; ++i) {
    344       MachineOperand &mo = mi->getOperand(i);
    345       if (!mo.isReg()) continue;
    346       int rx = RegIndex(mo.getReg());
    347       if (rx < 0) continue;
    348       if (DomainValue *dv = LiveRegs[rx]) {
    349         // Bitmask of domains that dv and available have in common.
    350         unsigned common = dv->getCommonDomains(available);
    351         // Is it possible to use this collapsed register for free?
    352         if (dv->isCollapsed()) {
    353           // Restrict available domains to the ones in common with the operand.
    354           // If there are no common domains, we must pay the cross-domain
    355           // penalty for this operand.
    356           if (common) available = common;
    357         } else if (common)
    358           // Open DomainValue is compatible, save it for merging.
    359           used.push_back(rx);
    360         else
    361           // Open DomainValue is not compatible with instruction. It is useless
    362           // now.
    363           Kill(rx);
    364       }
    365     }
    366 
    367   // If the collapsed operands force a single domain, propagate the collapse.
    368   if (isPowerOf2_32(available)) {
    369     unsigned domain = CountTrailingZeros_32(available);
    370     TII->setExecutionDomain(mi, domain);
    371     visitHardInstr(mi, domain);
    372     return;
    373   }
    374 
    375   // Kill off any remaining uses that don't match available, and build a list of
    376   // incoming DomainValues that we want to merge.
    377   SmallVector<DomainValue*,4> doms;
    378   for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
    379     int rx = *i;
    380     DomainValue *dv = LiveRegs[rx];
    381     // This useless DomainValue could have been missed above.
    382     if (!dv->getCommonDomains(available)) {
    383       Kill(*i);
    384       continue;
    385     }
    386     // sorted, uniqued insert.
    387     bool inserted = false;
    388     for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end();
    389            i != e && !inserted; ++i) {
    390       if (dv == *i)
    391         inserted = true;
    392       else if (dv->Dist < (*i)->Dist) {
    393         inserted = true;
    394         doms.insert(i, dv);
    395       }
    396     }
    397     if (!inserted)
    398       doms.push_back(dv);
    399   }
    400 
    401   // doms are now sorted in order of appearance. Try to merge them all, giving
    402   // priority to the latest ones.
    403   DomainValue *dv = 0;
    404   while (!doms.empty()) {
    405     if (!dv) {
    406       dv = doms.pop_back_val();
    407       continue;
    408     }
    409 
    410     DomainValue *latest = doms.pop_back_val();
    411     if (Merge(dv, latest)) continue;
    412 
    413     // If latest didn't merge, it is useless now. Kill all registers using it.
    414     for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i)
    415       if (LiveRegs[*i] == latest)
    416         Kill(*i);
    417   }
    418 
    419   // dv is the DomainValue we are going to use for this instruction.
    420   if (!dv)
    421     dv = Alloc();
    422   dv->Dist = Distance;
    423   dv->AvailableDomains = available;
    424   dv->Instrs.push_back(mi);
    425 
    426   // Finally set all defs and non-collapsed uses to dv.
    427   for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
    428     MachineOperand &mo = mi->getOperand(i);
    429     if (!mo.isReg()) continue;
    430     int rx = RegIndex(mo.getReg());
    431     if (rx < 0) continue;
    432     if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) {
    433       Kill(rx);
    434       SetLiveReg(rx, dv);
    435     }
    436   }
    437 }
    438 
    439 void ExeDepsFix::visitGenericInstr(MachineInstr *mi) {
    440   // Process explicit defs, kill any relevant registers redefined.
    441   for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
    442     MachineOperand &mo = mi->getOperand(i);
    443     if (!mo.isReg()) continue;
    444     int rx = RegIndex(mo.getReg());
    445     if (rx < 0) continue;
    446     Kill(rx);
    447   }
    448 }
    449 
    450 bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
    451   MF = &mf;
    452   TII = MF->getTarget().getInstrInfo();
    453   TRI = MF->getTarget().getRegisterInfo();
    454   MBB = 0;
    455   LiveRegs = 0;
    456   Distance = 0;
    457   assert(NumRegs == RC->getNumRegs() && "Bad regclass");
    458 
    459   // If no relevant registers are used in the function, we can skip it
    460   // completely.
    461   bool anyregs = false;
    462   for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
    463        I != E; ++I)
    464     if (MF->getRegInfo().isPhysRegUsed(*I)) {
    465       anyregs = true;
    466       break;
    467     }
    468   if (!anyregs) return false;
    469 
    470   // Initialize the AliasMap on the first use.
    471   if (AliasMap.empty()) {
    472     // Given a PhysReg, AliasMap[PhysReg] is either the relevant index into RC,
    473     // or -1.
    474     AliasMap.resize(TRI->getNumRegs(), -1);
    475     for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
    476       for (const unsigned *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI)
    477         AliasMap[*AI] = i;
    478   }
    479 
    480   MachineBasicBlock *Entry = MF->begin();
    481   SmallPtrSet<MachineBasicBlock*, 16> Visited;
    482   for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
    483          DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
    484          DFI != DFE; ++DFI) {
    485     MBB = *DFI;
    486     enterBasicBlock();
    487     for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
    488         ++I) {
    489       MachineInstr *mi = I;
    490       if (mi->isDebugValue()) continue;
    491       ++Distance;
    492       std::pair<uint16_t, uint16_t> domp = TII->getExecutionDomain(mi);
    493       if (domp.first)
    494         if (domp.second)
    495           visitSoftInstr(mi, domp.second);
    496         else
    497           visitHardInstr(mi, domp.first);
    498       else if (LiveRegs)
    499         visitGenericInstr(mi);
    500     }
    501 
    502     // Save live registers at end of MBB - used by enterBasicBlock().
    503     if (LiveRegs)
    504       LiveOuts.insert(std::make_pair(MBB, LiveRegs));
    505     LiveRegs = 0;
    506   }
    507 
    508   // Clear the LiveOuts vectors. Should we also collapse any remaining
    509   // DomainValues?
    510   for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end();
    511          i != e; ++i)
    512     delete[] i->second;
    513   LiveOuts.clear();
    514   Avail.clear();
    515   Allocator.DestroyAll();
    516 
    517   return false;
    518 }
    519 
    520 FunctionPass *
    521 llvm::createExecutionDependencyFixPass(const TargetRegisterClass *RC) {
    522   return new ExeDepsFix(RC);
    523 }
    524