1 //===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements hazard recognizers for scheduling on PowerPC processors. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "PPCHazardRecognizers.h" 15 #include "PPC.h" 16 #include "PPCInstrInfo.h" 17 #include "PPCTargetMachine.h" 18 #include "llvm/CodeGen/ScheduleDAG.h" 19 #include "llvm/Support/Debug.h" 20 #include "llvm/Support/ErrorHandling.h" 21 #include "llvm/Support/raw_ostream.h" 22 using namespace llvm; 23 24 #define DEBUG_TYPE "pre-RA-sched" 25 26 bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) { 27 // FIXME: Move this. 28 if (isBCTRAfterSet(SU)) 29 return true; 30 31 const MCInstrDesc *MCID = DAG->getInstrDesc(SU); 32 if (!MCID) 33 return false; 34 35 if (!MCID->mayLoad()) 36 return false; 37 38 // SU is a load; for any predecessors in this dispatch group, that are stores, 39 // and with which we have an ordering dependency, return true. 40 for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { 41 const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); 42 if (!PredMCID || !PredMCID->mayStore()) 43 continue; 44 45 if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier()) 46 continue; 47 48 for (unsigned j = 0, je = CurGroup.size(); j != je; ++j) 49 if (SU->Preds[i].getSUnit() == CurGroup[j]) 50 return true; 51 } 52 53 return false; 54 } 55 56 bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) { 57 const MCInstrDesc *MCID = DAG->getInstrDesc(SU); 58 if (!MCID) 59 return false; 60 61 if (!MCID->isBranch()) 62 return false; 63 64 // SU is a branch; for any predecessors in this dispatch group, with which we 65 // have a data dependence and set the counter register, return true. 66 for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { 67 const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); 68 if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR) 69 continue; 70 71 if (SU->Preds[i].isCtrl()) 72 continue; 73 74 for (unsigned j = 0, je = CurGroup.size(); j != je; ++j) 75 if (SU->Preds[i].getSUnit() == CurGroup[j]) 76 return true; 77 } 78 79 return false; 80 } 81 82 // FIXME: Remove this when we don't need this: 83 namespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } } 84 85 // FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific. 86 87 bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID, 88 unsigned &NSlots) { 89 // FIXME: Indirectly, this information is contained in the itinerary, and 90 // we should derive it from there instead of separately specifying it 91 // here. 92 unsigned IIC = MCID->getSchedClass(); 93 switch (IIC) { 94 default: 95 NSlots = 1; 96 break; 97 case PPC::Sched::IIC_IntDivW: 98 case PPC::Sched::IIC_IntDivD: 99 case PPC::Sched::IIC_LdStLoadUpd: 100 case PPC::Sched::IIC_LdStLDU: 101 case PPC::Sched::IIC_LdStLFDU: 102 case PPC::Sched::IIC_LdStLFDUX: 103 case PPC::Sched::IIC_LdStLHA: 104 case PPC::Sched::IIC_LdStLHAU: 105 case PPC::Sched::IIC_LdStLWA: 106 case PPC::Sched::IIC_LdStSTDU: 107 case PPC::Sched::IIC_LdStSTFDU: 108 NSlots = 2; 109 break; 110 case PPC::Sched::IIC_LdStLoadUpdX: 111 case PPC::Sched::IIC_LdStLDUX: 112 case PPC::Sched::IIC_LdStLHAUX: 113 case PPC::Sched::IIC_LdStLWARX: 114 case PPC::Sched::IIC_LdStLDARX: 115 case PPC::Sched::IIC_LdStSTDUX: 116 case PPC::Sched::IIC_LdStSTDCX: 117 case PPC::Sched::IIC_LdStSTWCX: 118 case PPC::Sched::IIC_BrMCRX: // mtcr 119 // FIXME: Add sync/isync (here and in the itinerary). 120 NSlots = 4; 121 break; 122 } 123 124 // FIXME: record-form instructions need a different itinerary class. 125 if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1) 126 NSlots = 2; 127 128 switch (IIC) { 129 default: 130 // All multi-slot instructions must come first. 131 return NSlots > 1; 132 case PPC::Sched::IIC_BrCR: // cr logicals 133 case PPC::Sched::IIC_SprMFCR: 134 case PPC::Sched::IIC_SprMFCRF: 135 case PPC::Sched::IIC_SprMTSPR: 136 return true; 137 } 138 } 139 140 ScheduleHazardRecognizer::HazardType 141 PPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { 142 if (Stalls == 0 && isLoadAfterStore(SU)) 143 return NoopHazard; 144 145 return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); 146 } 147 148 bool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) { 149 const MCInstrDesc *MCID = DAG->getInstrDesc(SU); 150 unsigned NSlots; 151 if (MCID && mustComeFirst(MCID, NSlots) && CurSlots) 152 return true; 153 154 return ScoreboardHazardRecognizer::ShouldPreferAnother(SU); 155 } 156 157 unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) { 158 // We only need to fill out a maximum of 5 slots here: The 6th slot could 159 // only be a second branch, and otherwise the next instruction will start a 160 // new group. 161 if (isLoadAfterStore(SU) && CurSlots < 6) { 162 unsigned Directive = 163 DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective(); 164 // If we're using a special group-terminating nop, then we need only one. 165 // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready 166 if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || 167 Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9) 168 return 1; 169 170 return 5 - CurSlots; 171 } 172 173 return ScoreboardHazardRecognizer::PreEmitNoops(SU); 174 } 175 176 void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) { 177 const MCInstrDesc *MCID = DAG->getInstrDesc(SU); 178 if (MCID) { 179 if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) { 180 CurGroup.clear(); 181 CurSlots = CurBranches = 0; 182 } else { 183 DEBUG(dbgs() << "**** Adding to dispatch group: SU(" << 184 SU->NodeNum << "): "); 185 DEBUG(DAG->dumpNode(SU)); 186 187 unsigned NSlots; 188 bool MustBeFirst = mustComeFirst(MCID, NSlots); 189 190 // If this instruction must come first, but does not, then it starts a 191 // new group. 192 if (MustBeFirst && CurSlots) { 193 CurSlots = CurBranches = 0; 194 CurGroup.clear(); 195 } 196 197 CurSlots += NSlots; 198 CurGroup.push_back(SU); 199 200 if (MCID->isBranch()) 201 ++CurBranches; 202 } 203 } 204 205 return ScoreboardHazardRecognizer::EmitInstruction(SU); 206 } 207 208 void PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() { 209 return ScoreboardHazardRecognizer::AdvanceCycle(); 210 } 211 212 void PPCDispatchGroupSBHazardRecognizer::RecedeCycle() { 213 llvm_unreachable("Bottom-up scheduling not supported"); 214 } 215 216 void PPCDispatchGroupSBHazardRecognizer::Reset() { 217 CurGroup.clear(); 218 CurSlots = CurBranches = 0; 219 return ScoreboardHazardRecognizer::Reset(); 220 } 221 222 void PPCDispatchGroupSBHazardRecognizer::EmitNoop() { 223 unsigned Directive = 224 DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective(); 225 // If the group has now filled all of its slots, or if we're using a special 226 // group-terminating nop, the group is complete. 227 // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready 228 if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || 229 Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR8 || 230 CurSlots == 6) { 231 CurGroup.clear(); 232 CurSlots = CurBranches = 0; 233 } else { 234 CurGroup.push_back(nullptr); 235 ++CurSlots; 236 } 237 } 238 239 //===----------------------------------------------------------------------===// 240 // PowerPC 970 Hazard Recognizer 241 // 242 // This models the dispatch group formation of the PPC970 processor. Dispatch 243 // groups are bundles of up to five instructions that can contain various mixes 244 // of instructions. The PPC970 can dispatch a peak of 4 non-branch and one 245 // branch instruction per-cycle. 246 // 247 // There are a number of restrictions to dispatch group formation: some 248 // instructions can only be issued in the first slot of a dispatch group, & some 249 // instructions fill an entire dispatch group. Additionally, only branches can 250 // issue in the 5th (last) slot. 251 // 252 // Finally, there are a number of "structural" hazards on the PPC970. These 253 // conditions cause large performance penalties due to misprediction, recovery, 254 // and replay logic that has to happen. These cases include setting a CTR and 255 // branching through it in the same dispatch group, and storing to an address, 256 // then loading from the same address within a dispatch group. To avoid these 257 // conditions, we insert no-op instructions when appropriate. 258 // 259 // FIXME: This is missing some significant cases: 260 // 1. Modeling of microcoded instructions. 261 // 2. Handling of serialized operations. 262 // 3. Handling of the esoteric cases in "Resource-based Instruction Grouping". 263 // 264 265 PPCHazardRecognizer970::PPCHazardRecognizer970(const ScheduleDAG &DAG) 266 : DAG(DAG) { 267 EndDispatchGroup(); 268 } 269 270 void PPCHazardRecognizer970::EndDispatchGroup() { 271 DEBUG(errs() << "=== Start of dispatch group\n"); 272 NumIssued = 0; 273 274 // Structural hazard info. 275 HasCTRSet = false; 276 NumStores = 0; 277 } 278 279 280 PPCII::PPC970_Unit 281 PPCHazardRecognizer970::GetInstrType(unsigned Opcode, 282 bool &isFirst, bool &isSingle, 283 bool &isCracked, 284 bool &isLoad, bool &isStore) { 285 const MCInstrDesc &MCID = DAG.TII->get(Opcode); 286 287 isLoad = MCID.mayLoad(); 288 isStore = MCID.mayStore(); 289 290 uint64_t TSFlags = MCID.TSFlags; 291 292 isFirst = TSFlags & PPCII::PPC970_First; 293 isSingle = TSFlags & PPCII::PPC970_Single; 294 isCracked = TSFlags & PPCII::PPC970_Cracked; 295 return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask); 296 } 297 298 /// isLoadOfStoredAddress - If we have a load from the previously stored pointer 299 /// as indicated by StorePtr1/StorePtr2/StoreSize, return true. 300 bool PPCHazardRecognizer970:: 301 isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset, 302 const Value *LoadValue) const { 303 for (unsigned i = 0, e = NumStores; i != e; ++i) { 304 // Handle exact and commuted addresses. 305 if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i]) 306 return true; 307 308 // Okay, we don't have an exact match, if this is an indexed offset, see if 309 // we have overlap (which happens during fp->int conversion for example). 310 if (StoreValue[i] == LoadValue) { 311 // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check 312 // to see if the load and store actually overlap. 313 if (StoreOffset[i] < LoadOffset) { 314 if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true; 315 } else { 316 if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true; 317 } 318 } 319 } 320 return false; 321 } 322 323 /// getHazardType - We return hazard for any non-branch instruction that would 324 /// terminate the dispatch group. We turn NoopHazard for any 325 /// instructions that wouldn't terminate the dispatch group that would cause a 326 /// pipeline flush. 327 ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970:: 328 getHazardType(SUnit *SU, int Stalls) { 329 assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead"); 330 331 MachineInstr *MI = SU->getInstr(); 332 333 if (MI->isDebugValue()) 334 return NoHazard; 335 336 unsigned Opcode = MI->getOpcode(); 337 bool isFirst, isSingle, isCracked, isLoad, isStore; 338 PPCII::PPC970_Unit InstrType = 339 GetInstrType(Opcode, isFirst, isSingle, isCracked, 340 isLoad, isStore); 341 if (InstrType == PPCII::PPC970_Pseudo) return NoHazard; 342 343 // We can only issue a PPC970_First/PPC970_Single instruction (such as 344 // crand/mtspr/etc) if this is the first cycle of the dispatch group. 345 if (NumIssued != 0 && (isFirst || isSingle)) 346 return Hazard; 347 348 // If this instruction is cracked into two ops by the decoder, we know that 349 // it is not a branch and that it cannot issue if 3 other instructions are 350 // already in the dispatch group. 351 if (isCracked && NumIssued > 2) 352 return Hazard; 353 354 switch (InstrType) { 355 default: llvm_unreachable("Unknown instruction type!"); 356 case PPCII::PPC970_FXU: 357 case PPCII::PPC970_LSU: 358 case PPCII::PPC970_FPU: 359 case PPCII::PPC970_VALU: 360 case PPCII::PPC970_VPERM: 361 // We can only issue a branch as the last instruction in a group. 362 if (NumIssued == 4) return Hazard; 363 break; 364 case PPCII::PPC970_CRU: 365 // We can only issue a CR instruction in the first two slots. 366 if (NumIssued >= 2) return Hazard; 367 break; 368 case PPCII::PPC970_BRU: 369 break; 370 } 371 372 // Do not allow MTCTR and BCTRL to be in the same dispatch group. 373 if (HasCTRSet && Opcode == PPC::BCTRL) 374 return NoopHazard; 375 376 // If this is a load following a store, make sure it's not to the same or 377 // overlapping address. 378 if (isLoad && NumStores && !MI->memoperands_empty()) { 379 MachineMemOperand *MO = *MI->memoperands_begin(); 380 if (isLoadOfStoredAddress(MO->getSize(), 381 MO->getOffset(), MO->getValue())) 382 return NoopHazard; 383 } 384 385 return NoHazard; 386 } 387 388 void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { 389 MachineInstr *MI = SU->getInstr(); 390 391 if (MI->isDebugValue()) 392 return; 393 394 unsigned Opcode = MI->getOpcode(); 395 bool isFirst, isSingle, isCracked, isLoad, isStore; 396 PPCII::PPC970_Unit InstrType = 397 GetInstrType(Opcode, isFirst, isSingle, isCracked, 398 isLoad, isStore); 399 if (InstrType == PPCII::PPC970_Pseudo) return; 400 401 // Update structural hazard information. 402 if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true; 403 404 // Track the address stored to. 405 if (isStore && NumStores < 4 && !MI->memoperands_empty()) { 406 MachineMemOperand *MO = *MI->memoperands_begin(); 407 StoreSize[NumStores] = MO->getSize(); 408 StoreOffset[NumStores] = MO->getOffset(); 409 StoreValue[NumStores] = MO->getValue(); 410 ++NumStores; 411 } 412 413 if (InstrType == PPCII::PPC970_BRU || isSingle) 414 NumIssued = 4; // Terminate a d-group. 415 ++NumIssued; 416 417 // If this instruction is cracked into two ops by the decoder, remember that 418 // we issued two pieces. 419 if (isCracked) 420 ++NumIssued; 421 422 if (NumIssued == 5) 423 EndDispatchGroup(); 424 } 425 426 void PPCHazardRecognizer970::AdvanceCycle() { 427 assert(NumIssued < 5 && "Illegal dispatch group!"); 428 ++NumIssued; 429 if (NumIssued == 5) 430 EndDispatchGroup(); 431 } 432 433 void PPCHazardRecognizer970::Reset() { 434 EndDispatchGroup(); 435 } 436 437