Home | History | Annotate | Download | only in PowerPC
      1 //===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements hazard recognizers for scheduling on PowerPC processors.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "PPCHazardRecognizers.h"
     15 #include "PPC.h"
     16 #include "PPCInstrInfo.h"
     17 #include "PPCTargetMachine.h"
     18 #include "llvm/CodeGen/ScheduleDAG.h"
     19 #include "llvm/Support/Debug.h"
     20 #include "llvm/Support/ErrorHandling.h"
     21 #include "llvm/Support/raw_ostream.h"
     22 using namespace llvm;
     23 
     24 #define DEBUG_TYPE "pre-RA-sched"
     25 
     26 bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) {
     27   // FIXME: Move this.
     28   if (isBCTRAfterSet(SU))
     29     return true;
     30 
     31   const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
     32   if (!MCID)
     33     return false;
     34 
     35   if (!MCID->mayLoad())
     36     return false;
     37 
     38   // SU is a load; for any predecessors in this dispatch group, that are stores,
     39   // and with which we have an ordering dependency, return true.
     40   for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
     41     const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
     42     if (!PredMCID || !PredMCID->mayStore())
     43       continue;
     44 
     45     if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier())
     46       continue;
     47 
     48     for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
     49       if (SU->Preds[i].getSUnit() == CurGroup[j])
     50         return true;
     51   }
     52 
     53   return false;
     54 }
     55 
     56 bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) {
     57   const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
     58   if (!MCID)
     59     return false;
     60 
     61   if (!MCID->isBranch())
     62     return false;
     63 
     64   // SU is a branch; for any predecessors in this dispatch group, with which we
     65   // have a data dependence and set the counter register, return true.
     66   for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
     67     const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
     68     if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR)
     69       continue;
     70 
     71     if (SU->Preds[i].isCtrl())
     72       continue;
     73 
     74     for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
     75       if (SU->Preds[i].getSUnit() == CurGroup[j])
     76         return true;
     77   }
     78 
     79   return false;
     80 }
     81 
     82 // FIXME: Remove this when we don't need this:
     83 namespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } }
     84 
     85 // FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific.
     86 
     87 bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID,
     88                                                        unsigned &NSlots) {
     89   // FIXME: Indirectly, this information is contained in the itinerary, and
     90   // we should derive it from there instead of separately specifying it
     91   // here.
     92   unsigned IIC = MCID->getSchedClass();
     93   switch (IIC) {
     94   default:
     95     NSlots = 1;
     96     break;
     97   case PPC::Sched::IIC_IntDivW:
     98   case PPC::Sched::IIC_IntDivD:
     99   case PPC::Sched::IIC_LdStLoadUpd:
    100   case PPC::Sched::IIC_LdStLDU:
    101   case PPC::Sched::IIC_LdStLFDU:
    102   case PPC::Sched::IIC_LdStLFDUX:
    103   case PPC::Sched::IIC_LdStLHA:
    104   case PPC::Sched::IIC_LdStLHAU:
    105   case PPC::Sched::IIC_LdStLWA:
    106   case PPC::Sched::IIC_LdStSTDU:
    107   case PPC::Sched::IIC_LdStSTFDU:
    108     NSlots = 2;
    109     break;
    110   case PPC::Sched::IIC_LdStLoadUpdX:
    111   case PPC::Sched::IIC_LdStLDUX:
    112   case PPC::Sched::IIC_LdStLHAUX:
    113   case PPC::Sched::IIC_LdStLWARX:
    114   case PPC::Sched::IIC_LdStLDARX:
    115   case PPC::Sched::IIC_LdStSTDUX:
    116   case PPC::Sched::IIC_LdStSTDCX:
    117   case PPC::Sched::IIC_LdStSTWCX:
    118   case PPC::Sched::IIC_BrMCRX: // mtcr
    119   // FIXME: Add sync/isync (here and in the itinerary).
    120     NSlots = 4;
    121     break;
    122   }
    123 
    124   // FIXME: record-form instructions need a different itinerary class.
    125   if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1)
    126     NSlots = 2;
    127 
    128   switch (IIC) {
    129   default:
    130     // All multi-slot instructions must come first.
    131     return NSlots > 1;
    132   case PPC::Sched::IIC_BrCR: // cr logicals
    133   case PPC::Sched::IIC_SprMFCR:
    134   case PPC::Sched::IIC_SprMFCRF:
    135   case PPC::Sched::IIC_SprMTSPR:
    136     return true;
    137   }
    138 }
    139 
    140 ScheduleHazardRecognizer::HazardType
    141 PPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
    142   if (Stalls == 0 && isLoadAfterStore(SU))
    143     return NoopHazard;
    144 
    145   return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
    146 }
    147 
    148 bool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
    149   const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
    150   unsigned NSlots;
    151   if (MCID && mustComeFirst(MCID, NSlots) && CurSlots)
    152     return true;
    153 
    154   return ScoreboardHazardRecognizer::ShouldPreferAnother(SU);
    155 }
    156 
    157 unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {
    158   // We only need to fill out a maximum of 5 slots here: The 6th slot could
    159   // only be a second branch, and otherwise the next instruction will start a
    160   // new group.
    161   if (isLoadAfterStore(SU) && CurSlots < 6) {
    162     unsigned Directive =
    163         DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
    164     // If we're using a special group-terminating nop, then we need only one.
    165     // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
    166     if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
    167         Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9)
    168       return 1;
    169 
    170     return 5 - CurSlots;
    171   }
    172 
    173   return ScoreboardHazardRecognizer::PreEmitNoops(SU);
    174 }
    175 
    176 void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) {
    177   const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
    178   if (MCID) {
    179     if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) {
    180       CurGroup.clear();
    181       CurSlots = CurBranches = 0;
    182     } else {
    183       DEBUG(dbgs() << "**** Adding to dispatch group: SU(" <<
    184                       SU->NodeNum << "): ");
    185       DEBUG(DAG->dumpNode(SU));
    186 
    187       unsigned NSlots;
    188       bool MustBeFirst = mustComeFirst(MCID, NSlots);
    189 
    190       // If this instruction must come first, but does not, then it starts a
    191       // new group.
    192       if (MustBeFirst && CurSlots) {
    193         CurSlots = CurBranches = 0;
    194         CurGroup.clear();
    195       }
    196 
    197       CurSlots += NSlots;
    198       CurGroup.push_back(SU);
    199 
    200       if (MCID->isBranch())
    201         ++CurBranches;
    202     }
    203   }
    204 
    205   return ScoreboardHazardRecognizer::EmitInstruction(SU);
    206 }
    207 
    208 void PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() {
    209   return ScoreboardHazardRecognizer::AdvanceCycle();
    210 }
    211 
    212 void PPCDispatchGroupSBHazardRecognizer::RecedeCycle() {
    213   llvm_unreachable("Bottom-up scheduling not supported");
    214 }
    215 
    216 void PPCDispatchGroupSBHazardRecognizer::Reset() {
    217   CurGroup.clear();
    218   CurSlots = CurBranches = 0;
    219   return ScoreboardHazardRecognizer::Reset();
    220 }
    221 
    222 void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
    223   unsigned Directive =
    224       DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
    225   // If the group has now filled all of its slots, or if we're using a special
    226   // group-terminating nop, the group is complete.
    227   // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
    228   if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
    229       Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR8 ||
    230       CurSlots == 6) {
    231     CurGroup.clear();
    232     CurSlots = CurBranches = 0;
    233   } else {
    234     CurGroup.push_back(nullptr);
    235     ++CurSlots;
    236   }
    237 }
    238 
    239 //===----------------------------------------------------------------------===//
    240 // PowerPC 970 Hazard Recognizer
    241 //
    242 // This models the dispatch group formation of the PPC970 processor.  Dispatch
    243 // groups are bundles of up to five instructions that can contain various mixes
    244 // of instructions.  The PPC970 can dispatch a peak of 4 non-branch and one
    245 // branch instruction per-cycle.
    246 //
    247 // There are a number of restrictions to dispatch group formation: some
    248 // instructions can only be issued in the first slot of a dispatch group, & some
    249 // instructions fill an entire dispatch group.  Additionally, only branches can
    250 // issue in the 5th (last) slot.
    251 //
    252 // Finally, there are a number of "structural" hazards on the PPC970.  These
    253 // conditions cause large performance penalties due to misprediction, recovery,
    254 // and replay logic that has to happen.  These cases include setting a CTR and
    255 // branching through it in the same dispatch group, and storing to an address,
    256 // then loading from the same address within a dispatch group.  To avoid these
    257 // conditions, we insert no-op instructions when appropriate.
    258 //
    259 // FIXME: This is missing some significant cases:
    260 //   1. Modeling of microcoded instructions.
    261 //   2. Handling of serialized operations.
    262 //   3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
    263 //
    264 
    265 PPCHazardRecognizer970::PPCHazardRecognizer970(const ScheduleDAG &DAG)
    266     : DAG(DAG) {
    267   EndDispatchGroup();
    268 }
    269 
    270 void PPCHazardRecognizer970::EndDispatchGroup() {
    271   DEBUG(errs() << "=== Start of dispatch group\n");
    272   NumIssued = 0;
    273 
    274   // Structural hazard info.
    275   HasCTRSet = false;
    276   NumStores = 0;
    277 }
    278 
    279 
    280 PPCII::PPC970_Unit
    281 PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
    282                                      bool &isFirst, bool &isSingle,
    283                                      bool &isCracked,
    284                                      bool &isLoad, bool &isStore) {
    285   const MCInstrDesc &MCID = DAG.TII->get(Opcode);
    286 
    287   isLoad  = MCID.mayLoad();
    288   isStore = MCID.mayStore();
    289 
    290   uint64_t TSFlags = MCID.TSFlags;
    291 
    292   isFirst   = TSFlags & PPCII::PPC970_First;
    293   isSingle  = TSFlags & PPCII::PPC970_Single;
    294   isCracked = TSFlags & PPCII::PPC970_Cracked;
    295   return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask);
    296 }
    297 
    298 /// isLoadOfStoredAddress - If we have a load from the previously stored pointer
    299 /// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
    300 bool PPCHazardRecognizer970::
    301 isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
    302   const Value *LoadValue) const {
    303   for (unsigned i = 0, e = NumStores; i != e; ++i) {
    304     // Handle exact and commuted addresses.
    305     if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i])
    306       return true;
    307 
    308     // Okay, we don't have an exact match, if this is an indexed offset, see if
    309     // we have overlap (which happens during fp->int conversion for example).
    310     if (StoreValue[i] == LoadValue) {
    311       // Okay the base pointers match, so we have [c1+r] vs [c2+r].  Check
    312       // to see if the load and store actually overlap.
    313       if (StoreOffset[i] < LoadOffset) {
    314         if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true;
    315       } else {
    316         if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true;
    317       }
    318     }
    319   }
    320   return false;
    321 }
    322 
    323 /// getHazardType - We return hazard for any non-branch instruction that would
    324 /// terminate the dispatch group.  We turn NoopHazard for any
    325 /// instructions that wouldn't terminate the dispatch group that would cause a
    326 /// pipeline flush.
    327 ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
    328 getHazardType(SUnit *SU, int Stalls) {
    329   assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");
    330 
    331   MachineInstr *MI = SU->getInstr();
    332 
    333   if (MI->isDebugValue())
    334     return NoHazard;
    335 
    336   unsigned Opcode = MI->getOpcode();
    337   bool isFirst, isSingle, isCracked, isLoad, isStore;
    338   PPCII::PPC970_Unit InstrType =
    339     GetInstrType(Opcode, isFirst, isSingle, isCracked,
    340                  isLoad, isStore);
    341   if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
    342 
    343   // We can only issue a PPC970_First/PPC970_Single instruction (such as
    344   // crand/mtspr/etc) if this is the first cycle of the dispatch group.
    345   if (NumIssued != 0 && (isFirst || isSingle))
    346     return Hazard;
    347 
    348   // If this instruction is cracked into two ops by the decoder, we know that
    349   // it is not a branch and that it cannot issue if 3 other instructions are
    350   // already in the dispatch group.
    351   if (isCracked && NumIssued > 2)
    352     return Hazard;
    353 
    354   switch (InstrType) {
    355   default: llvm_unreachable("Unknown instruction type!");
    356   case PPCII::PPC970_FXU:
    357   case PPCII::PPC970_LSU:
    358   case PPCII::PPC970_FPU:
    359   case PPCII::PPC970_VALU:
    360   case PPCII::PPC970_VPERM:
    361     // We can only issue a branch as the last instruction in a group.
    362     if (NumIssued == 4) return Hazard;
    363     break;
    364   case PPCII::PPC970_CRU:
    365     // We can only issue a CR instruction in the first two slots.
    366     if (NumIssued >= 2) return Hazard;
    367     break;
    368   case PPCII::PPC970_BRU:
    369     break;
    370   }
    371 
    372   // Do not allow MTCTR and BCTRL to be in the same dispatch group.
    373   if (HasCTRSet && Opcode == PPC::BCTRL)
    374     return NoopHazard;
    375 
    376   // If this is a load following a store, make sure it's not to the same or
    377   // overlapping address.
    378   if (isLoad && NumStores && !MI->memoperands_empty()) {
    379     MachineMemOperand *MO = *MI->memoperands_begin();
    380     if (isLoadOfStoredAddress(MO->getSize(),
    381                               MO->getOffset(), MO->getValue()))
    382       return NoopHazard;
    383   }
    384 
    385   return NoHazard;
    386 }
    387 
    388 void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
    389   MachineInstr *MI = SU->getInstr();
    390 
    391   if (MI->isDebugValue())
    392     return;
    393 
    394   unsigned Opcode = MI->getOpcode();
    395   bool isFirst, isSingle, isCracked, isLoad, isStore;
    396   PPCII::PPC970_Unit InstrType =
    397     GetInstrType(Opcode, isFirst, isSingle, isCracked,
    398                  isLoad, isStore);
    399   if (InstrType == PPCII::PPC970_Pseudo) return;
    400 
    401   // Update structural hazard information.
    402   if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true;
    403 
    404   // Track the address stored to.
    405   if (isStore && NumStores < 4 && !MI->memoperands_empty()) {
    406     MachineMemOperand *MO = *MI->memoperands_begin();
    407     StoreSize[NumStores] = MO->getSize();
    408     StoreOffset[NumStores] = MO->getOffset();
    409     StoreValue[NumStores] = MO->getValue();
    410     ++NumStores;
    411   }
    412 
    413   if (InstrType == PPCII::PPC970_BRU || isSingle)
    414     NumIssued = 4;  // Terminate a d-group.
    415   ++NumIssued;
    416 
    417   // If this instruction is cracked into two ops by the decoder, remember that
    418   // we issued two pieces.
    419   if (isCracked)
    420     ++NumIssued;
    421 
    422   if (NumIssued == 5)
    423     EndDispatchGroup();
    424 }
    425 
    426 void PPCHazardRecognizer970::AdvanceCycle() {
    427   assert(NumIssued < 5 && "Illegal dispatch group!");
    428   ++NumIssued;
    429   if (NumIssued == 5)
    430     EndDispatchGroup();
    431 }
    432 
    433 void PPCHazardRecognizer970::Reset() {
    434   EndDispatchGroup();
    435 }
    436 
    437