1 //===--- HexagonEarlyIfConv.cpp -------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This implements a Hexagon-specific if-conversion pass that runs on the 11 // SSA form. 12 // In SSA it is not straightforward to represent instructions that condi- 13 // tionally define registers, since a conditionally-defined register may 14 // only be used under the same condition on which the definition was based. 15 // To avoid complications of this nature, this patch will only generate 16 // predicated stores, and speculate other instructions from the "if-conver- 17 // ted" block. 18 // The code will recognize CFG patterns where a block with a conditional 19 // branch "splits" into a "true block" and a "false block". Either of these 20 // could be omitted (in case of a triangle, for example). 21 // If after conversion of the side block(s) the CFG allows it, the resul- 22 // ting blocks may be merged. If the "join" block contained PHI nodes, they 23 // will be replaced with MUX (or MUX-like) instructions to maintain the 24 // semantics of the PHI. 25 // 26 // Example: 27 // 28 // %vreg40<def> = L2_loadrub_io %vreg39<kill>, 1 29 // %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0 30 // J2_jumpt %vreg41<kill>, <BB#5>, %PC<imp-def,dead> 31 // J2_jump <BB#4>, %PC<imp-def,dead> 32 // Successors according to CFG: BB#4(62) BB#5(62) 33 // 34 // BB#4: derived from LLVM BB %if.then 35 // Predecessors according to CFG: BB#3 36 // %vreg11<def> = A2_addp %vreg6, %vreg10 37 // S2_storerd_io %vreg32, 16, %vreg11 38 // Successors according to CFG: BB#5 39 // 40 // BB#5: derived from LLVM BB %if.end 41 // Predecessors according to CFG: BB#3 BB#4 42 // %vreg12<def> = PHI %vreg6, <BB#3>, %vreg11, <BB#4> 43 // %vreg13<def> = A2_addp %vreg7, %vreg12 44 // %vreg42<def> = C2_cmpeqi %vreg9, 10 45 // J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead> 46 // J2_jump <BB#6>, %PC<imp-def,dead> 47 // Successors according to CFG: BB#6(4) BB#3(124) 48 // 49 // would become: 50 // 51 // %vreg40<def> = L2_loadrub_io %vreg39<kill>, 1 52 // %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0 53 // spec-> %vreg11<def> = A2_addp %vreg6, %vreg10 54 // pred-> S2_pstorerdf_io %vreg41, %vreg32, 16, %vreg11 55 // %vreg46<def> = MUX64_rr %vreg41, %vreg6, %vreg11 56 // %vreg13<def> = A2_addp %vreg7, %vreg46 57 // %vreg42<def> = C2_cmpeqi %vreg9, 10 58 // J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead> 59 // J2_jump <BB#6>, %PC<imp-def,dead> 60 // Successors according to CFG: BB#6 BB#3 61 62 #define DEBUG_TYPE "hexagon-eif" 63 64 #include "llvm/ADT/DenseSet.h" 65 #include "llvm/ADT/SetVector.h" 66 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 67 #include "llvm/CodeGen/MachineDominators.h" 68 #include "llvm/CodeGen/MachineFunctionPass.h" 69 #include "llvm/CodeGen/MachineInstrBuilder.h" 70 #include "llvm/CodeGen/MachineLoopInfo.h" 71 #include "llvm/CodeGen/MachineRegisterInfo.h" 72 #include "llvm/CodeGen/Passes.h" 73 #include "llvm/Support/CommandLine.h" 74 #include "llvm/Support/Debug.h" 75 #include "llvm/Support/raw_ostream.h" 76 #include "llvm/Target/TargetInstrInfo.h" 77 #include "llvm/Target/TargetMachine.h" 78 #include "HexagonTargetMachine.h" 79 80 #include <functional> 81 #include <set> 82 #include <vector> 83 84 using namespace llvm; 85 86 namespace llvm { 87 FunctionPass *createHexagonEarlyIfConversion(); 88 void initializeHexagonEarlyIfConversionPass(PassRegistry& Registry); 89 } 90 91 namespace { 92 cl::opt<bool> EnableHexagonBP("enable-hexagon-br-prob", cl::Hidden, 93 cl::init(false), cl::desc("Enable branch probability info")); 94 cl::opt<unsigned> SizeLimit("eif-limit", cl::init(6), cl::Hidden, 95 cl::desc("Size limit in Hexagon early if-conversion")); 96 97 struct PrintMB { 98 PrintMB(const MachineBasicBlock *B) : MB(B) {} 99 const MachineBasicBlock *MB; 100 }; 101 raw_ostream &operator<< (raw_ostream &OS, const PrintMB &P) { 102 if (!P.MB) 103 return OS << "<none>"; 104 return OS << '#' << P.MB->getNumber(); 105 } 106 107 struct FlowPattern { 108 FlowPattern() : SplitB(0), TrueB(0), FalseB(0), JoinB(0), PredR(0) {} 109 FlowPattern(MachineBasicBlock *B, unsigned PR, MachineBasicBlock *TB, 110 MachineBasicBlock *FB, MachineBasicBlock *JB) 111 : SplitB(B), TrueB(TB), FalseB(FB), JoinB(JB), PredR(PR) {} 112 113 MachineBasicBlock *SplitB; 114 MachineBasicBlock *TrueB, *FalseB, *JoinB; 115 unsigned PredR; 116 }; 117 struct PrintFP { 118 PrintFP(const FlowPattern &P, const TargetRegisterInfo &T) 119 : FP(P), TRI(T) {} 120 const FlowPattern &FP; 121 const TargetRegisterInfo &TRI; 122 friend raw_ostream &operator<< (raw_ostream &OS, const PrintFP &P); 123 }; 124 raw_ostream &operator<<(raw_ostream &OS, 125 const PrintFP &P) LLVM_ATTRIBUTE_UNUSED; 126 raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P) { 127 OS << "{ SplitB:" << PrintMB(P.FP.SplitB) 128 << ", PredR:" << PrintReg(P.FP.PredR, &P.TRI) 129 << ", TrueB:" << PrintMB(P.FP.TrueB) << ", FalseB:" 130 << PrintMB(P.FP.FalseB) 131 << ", JoinB:" << PrintMB(P.FP.JoinB) << " }"; 132 return OS; 133 } 134 135 class HexagonEarlyIfConversion : public MachineFunctionPass { 136 public: 137 static char ID; 138 HexagonEarlyIfConversion() : MachineFunctionPass(ID), 139 TII(0), TRI(0), MFN(0), MRI(0), MDT(0), MLI(0) { 140 initializeHexagonEarlyIfConversionPass(*PassRegistry::getPassRegistry()); 141 } 142 const char *getPassName() const override { 143 return "Hexagon early if conversion"; 144 } 145 void getAnalysisUsage(AnalysisUsage &AU) const override { 146 AU.addRequired<MachineBranchProbabilityInfo>(); 147 AU.addRequired<MachineDominatorTree>(); 148 AU.addPreserved<MachineDominatorTree>(); 149 AU.addRequired<MachineLoopInfo>(); 150 MachineFunctionPass::getAnalysisUsage(AU); 151 } 152 bool runOnMachineFunction(MachineFunction &MF) override; 153 154 private: 155 typedef DenseSet<MachineBasicBlock*> BlockSetType; 156 157 bool isPreheader(const MachineBasicBlock *B) const; 158 bool matchFlowPattern(MachineBasicBlock *B, MachineLoop *L, 159 FlowPattern &FP); 160 bool visitBlock(MachineBasicBlock *B, MachineLoop *L); 161 bool visitLoop(MachineLoop *L); 162 163 bool hasEHLabel(const MachineBasicBlock *B) const; 164 bool hasUncondBranch(const MachineBasicBlock *B) const; 165 bool isValidCandidate(const MachineBasicBlock *B) const; 166 bool usesUndefVReg(const MachineInstr *MI) const; 167 bool isValid(const FlowPattern &FP) const; 168 unsigned countPredicateDefs(const MachineBasicBlock *B) const; 169 unsigned computePhiCost(MachineBasicBlock *B) const; 170 bool isProfitable(const FlowPattern &FP) const; 171 bool isPredicableStore(const MachineInstr *MI) const; 172 bool isSafeToSpeculate(const MachineInstr *MI) const; 173 174 unsigned getCondStoreOpcode(unsigned Opc, bool IfTrue) const; 175 void predicateInstr(MachineBasicBlock *ToB, MachineBasicBlock::iterator At, 176 MachineInstr *MI, unsigned PredR, bool IfTrue); 177 void predicateBlockNB(MachineBasicBlock *ToB, 178 MachineBasicBlock::iterator At, MachineBasicBlock *FromB, 179 unsigned PredR, bool IfTrue); 180 181 void updatePhiNodes(MachineBasicBlock *WhereB, const FlowPattern &FP); 182 void convert(const FlowPattern &FP); 183 184 void removeBlock(MachineBasicBlock *B); 185 void eliminatePhis(MachineBasicBlock *B); 186 void replacePhiEdges(MachineBasicBlock *OldB, MachineBasicBlock *NewB); 187 void mergeBlocks(MachineBasicBlock *PredB, MachineBasicBlock *SuccB); 188 void simplifyFlowGraph(const FlowPattern &FP); 189 190 const TargetInstrInfo *TII; 191 const TargetRegisterInfo *TRI; 192 MachineFunction *MFN; 193 MachineRegisterInfo *MRI; 194 MachineDominatorTree *MDT; 195 MachineLoopInfo *MLI; 196 BlockSetType Deleted; 197 const MachineBranchProbabilityInfo *MBPI; 198 }; 199 200 char HexagonEarlyIfConversion::ID = 0; 201 } 202 203 INITIALIZE_PASS(HexagonEarlyIfConversion, "hexagon-eif", 204 "Hexagon early if conversion", false, false) 205 206 bool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock *B) const { 207 if (B->succ_size() != 1) 208 return false; 209 MachineBasicBlock *SB = *B->succ_begin(); 210 MachineLoop *L = MLI->getLoopFor(SB); 211 return L && SB == L->getHeader(); 212 } 213 214 215 bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B, 216 MachineLoop *L, FlowPattern &FP) { 217 DEBUG(dbgs() << "Checking flow pattern at BB#" << B->getNumber() << "\n"); 218 219 // Interested only in conditional branches, no .new, no new-value, etc. 220 // Check the terminators directly, it's easier than handling all responses 221 // from AnalyzeBranch. 222 MachineBasicBlock *TB = 0, *FB = 0; 223 MachineBasicBlock::const_iterator T1I = B->getFirstTerminator(); 224 if (T1I == B->end()) 225 return false; 226 unsigned Opc = T1I->getOpcode(); 227 if (Opc != Hexagon::J2_jumpt && Opc != Hexagon::J2_jumpf) 228 return false; 229 unsigned PredR = T1I->getOperand(0).getReg(); 230 231 // Get the layout successor, or 0 if B does not have one. 232 MachineFunction::iterator NextBI = std::next(MachineFunction::iterator(B)); 233 MachineBasicBlock *NextB = (NextBI != MFN->end()) ? &*NextBI : 0; 234 235 MachineBasicBlock *T1B = T1I->getOperand(1).getMBB(); 236 MachineBasicBlock::const_iterator T2I = std::next(T1I); 237 // The second terminator should be an unconditional branch. 238 assert(T2I == B->end() || T2I->getOpcode() == Hexagon::J2_jump); 239 MachineBasicBlock *T2B = (T2I == B->end()) ? NextB 240 : T2I->getOperand(0).getMBB(); 241 if (T1B == T2B) { 242 // XXX merge if T1B == NextB, or convert branch to unconditional. 243 // mark as diamond with both sides equal? 244 return false; 245 } 246 // Loop could be null for both. 247 if (MLI->getLoopFor(T1B) != L || MLI->getLoopFor(T2B) != L) 248 return false; 249 250 // Record the true/false blocks in such a way that "true" means "if (PredR)", 251 // and "false" means "if (!PredR)". 252 if (Opc == Hexagon::J2_jumpt) 253 TB = T1B, FB = T2B; 254 else 255 TB = T2B, FB = T1B; 256 257 if (!MDT->properlyDominates(B, TB) || !MDT->properlyDominates(B, FB)) 258 return false; 259 260 // Detect triangle first. In case of a triangle, one of the blocks TB/FB 261 // can fall through into the other, in other words, it will be executed 262 // in both cases. We only want to predicate the block that is executed 263 // conditionally. 264 unsigned TNP = TB->pred_size(), FNP = FB->pred_size(); 265 unsigned TNS = TB->succ_size(), FNS = FB->succ_size(); 266 267 // A block is predicable if it has one predecessor (it must be B), and 268 // it has a single successor. In fact, the block has to end either with 269 // an unconditional branch (which can be predicated), or with a fall- 270 // through. 271 bool TOk = (TNP == 1) && (TNS == 1); 272 bool FOk = (FNP == 1) && (FNS == 1); 273 274 // If neither is predicable, there is nothing interesting. 275 if (!TOk && !FOk) 276 return false; 277 278 MachineBasicBlock *TSB = (TNS > 0) ? *TB->succ_begin() : 0; 279 MachineBasicBlock *FSB = (FNS > 0) ? *FB->succ_begin() : 0; 280 MachineBasicBlock *JB = 0; 281 282 if (TOk) { 283 if (FOk) { 284 if (TSB == FSB) 285 JB = TSB; 286 // Diamond: "if (P) then TB; else FB;". 287 } else { 288 // TOk && !FOk 289 if (TSB == FB) { 290 JB = FB; 291 FB = 0; 292 } 293 } 294 } else { 295 // !TOk && FOk (at least one must be true by now). 296 if (FSB == TB) { 297 JB = TB; 298 TB = 0; 299 } 300 } 301 // Don't try to predicate loop preheaders. 302 if ((TB && isPreheader(TB)) || (FB && isPreheader(FB))) { 303 DEBUG(dbgs() << "One of blocks " << PrintMB(TB) << ", " << PrintMB(FB) 304 << " is a loop preheader. Skipping.\n"); 305 return false; 306 } 307 308 FP = FlowPattern(B, PredR, TB, FB, JB); 309 DEBUG(dbgs() << "Detected " << PrintFP(FP, *TRI) << "\n"); 310 return true; 311 } 312 313 314 // KLUDGE: HexagonInstrInfo::AnalyzeBranch won't work on a block that 315 // contains EH_LABEL. 316 bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock *B) const { 317 for (auto &I : *B) 318 if (I.isEHLabel()) 319 return true; 320 return false; 321 } 322 323 324 // KLUDGE: HexagonInstrInfo::AnalyzeBranch may be unable to recognize 325 // that a block can never fall-through. 326 bool HexagonEarlyIfConversion::hasUncondBranch(const MachineBasicBlock *B) 327 const { 328 MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end(); 329 while (I != E) { 330 if (I->isBarrier()) 331 return true; 332 ++I; 333 } 334 return false; 335 } 336 337 338 bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B) 339 const { 340 if (!B) 341 return true; 342 if (B->isEHPad() || B->hasAddressTaken()) 343 return false; 344 if (B->succ_size() == 0) 345 return false; 346 347 for (auto &MI : *B) { 348 if (MI.isDebugValue()) 349 continue; 350 if (MI.isConditionalBranch()) 351 return false; 352 unsigned Opc = MI.getOpcode(); 353 bool IsJMP = (Opc == Hexagon::J2_jump); 354 if (!isPredicableStore(&MI) && !IsJMP && !isSafeToSpeculate(&MI)) 355 return false; 356 // Look for predicate registers defined by this instruction. It's ok 357 // to speculate such an instruction, but the predicate register cannot 358 // be used outside of this block (or else it won't be possible to 359 // update the use of it after predication). PHI uses will be updated 360 // to use a result of a MUX, and a MUX cannot be created for predicate 361 // registers. 362 for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) { 363 if (!MO->isReg() || !MO->isDef()) 364 continue; 365 unsigned R = MO->getReg(); 366 if (!TargetRegisterInfo::isVirtualRegister(R)) 367 continue; 368 if (MRI->getRegClass(R) != &Hexagon::PredRegsRegClass) 369 continue; 370 for (auto U = MRI->use_begin(R); U != MRI->use_end(); ++U) 371 if (U->getParent()->isPHI()) 372 return false; 373 } 374 } 375 return true; 376 } 377 378 379 bool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr *MI) const { 380 for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { 381 if (!MO->isReg() || !MO->isUse()) 382 continue; 383 unsigned R = MO->getReg(); 384 if (!TargetRegisterInfo::isVirtualRegister(R)) 385 continue; 386 const MachineInstr *DefI = MRI->getVRegDef(R); 387 // "Undefined" virtual registers are actually defined via IMPLICIT_DEF. 388 assert(DefI && "Expecting a reaching def in MRI"); 389 if (DefI->isImplicitDef()) 390 return true; 391 } 392 return false; 393 } 394 395 396 bool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const { 397 if (hasEHLabel(FP.SplitB)) // KLUDGE: see function definition 398 return false; 399 if (FP.TrueB && !isValidCandidate(FP.TrueB)) 400 return false; 401 if (FP.FalseB && !isValidCandidate(FP.FalseB)) 402 return false; 403 // Check the PHIs in the join block. If any of them use a register 404 // that is defined as IMPLICIT_DEF, do not convert this. This can 405 // legitimately happen if one side of the split never executes, but 406 // the compiler is unable to prove it. That side may then seem to 407 // provide an "undef" value to the join block, however it will never 408 // execute at run-time. If we convert this case, the "undef" will 409 // be used in a MUX instruction, and that may seem like actually 410 // using an undefined value to other optimizations. This could lead 411 // to trouble further down the optimization stream, cause assertions 412 // to fail, etc. 413 if (FP.JoinB) { 414 const MachineBasicBlock &B = *FP.JoinB; 415 for (auto &MI : B) { 416 if (!MI.isPHI()) 417 break; 418 if (usesUndefVReg(&MI)) 419 return false; 420 unsigned DefR = MI.getOperand(0).getReg(); 421 const TargetRegisterClass *RC = MRI->getRegClass(DefR); 422 if (RC == &Hexagon::PredRegsRegClass) 423 return false; 424 } 425 } 426 return true; 427 } 428 429 430 unsigned HexagonEarlyIfConversion::computePhiCost(MachineBasicBlock *B) const { 431 assert(B->pred_size() <= 2); 432 if (B->pred_size() < 2) 433 return 0; 434 435 unsigned Cost = 0; 436 MachineBasicBlock::const_iterator I, E = B->getFirstNonPHI(); 437 for (I = B->begin(); I != E; ++I) { 438 const MachineOperand &RO1 = I->getOperand(1); 439 const MachineOperand &RO3 = I->getOperand(3); 440 assert(RO1.isReg() && RO3.isReg()); 441 // Must have a MUX if the phi uses a subregister. 442 if (RO1.getSubReg() != 0 || RO3.getSubReg() != 0) { 443 Cost++; 444 continue; 445 } 446 MachineInstr *Def1 = MRI->getVRegDef(RO1.getReg()); 447 MachineInstr *Def3 = MRI->getVRegDef(RO3.getReg()); 448 if (!TII->isPredicable(Def1) || !TII->isPredicable(Def3)) 449 Cost++; 450 } 451 return Cost; 452 } 453 454 455 unsigned HexagonEarlyIfConversion::countPredicateDefs( 456 const MachineBasicBlock *B) const { 457 unsigned PredDefs = 0; 458 for (auto &MI : *B) { 459 for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) { 460 if (!MO->isReg() || !MO->isDef()) 461 continue; 462 unsigned R = MO->getReg(); 463 if (!TargetRegisterInfo::isVirtualRegister(R)) 464 continue; 465 if (MRI->getRegClass(R) == &Hexagon::PredRegsRegClass) 466 PredDefs++; 467 } 468 } 469 return PredDefs; 470 } 471 472 473 bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const { 474 if (FP.TrueB && FP.FalseB) { 475 476 // Do not IfCovert if the branch is one sided. 477 if (MBPI) { 478 BranchProbability Prob(9, 10); 479 if (MBPI->getEdgeProbability(FP.SplitB, FP.TrueB) > Prob) 480 return false; 481 if (MBPI->getEdgeProbability(FP.SplitB, FP.FalseB) > Prob) 482 return false; 483 } 484 485 // If both sides are predicable, convert them if they join, and the 486 // join block has no other predecessors. 487 MachineBasicBlock *TSB = *FP.TrueB->succ_begin(); 488 MachineBasicBlock *FSB = *FP.FalseB->succ_begin(); 489 if (TSB != FSB) 490 return false; 491 if (TSB->pred_size() != 2) 492 return false; 493 } 494 495 // Calculate the total size of the predicated blocks. 496 // Assume instruction counts without branches to be the approximation of 497 // the code size. If the predicated blocks are smaller than a packet size, 498 // approximate the spare room in the packet that could be filled with the 499 // predicated/speculated instructions. 500 unsigned TS = 0, FS = 0, Spare = 0; 501 if (FP.TrueB) { 502 TS = std::distance(FP.TrueB->begin(), FP.TrueB->getFirstTerminator()); 503 if (TS < HEXAGON_PACKET_SIZE) 504 Spare += HEXAGON_PACKET_SIZE-TS; 505 } 506 if (FP.FalseB) { 507 FS = std::distance(FP.FalseB->begin(), FP.FalseB->getFirstTerminator()); 508 if (FS < HEXAGON_PACKET_SIZE) 509 Spare += HEXAGON_PACKET_SIZE-TS; 510 } 511 unsigned TotalIn = TS+FS; 512 DEBUG(dbgs() << "Total number of instructions to be predicated/speculated: " 513 << TotalIn << ", spare room: " << Spare << "\n"); 514 if (TotalIn >= SizeLimit+Spare) 515 return false; 516 517 // Count the number of PHI nodes that will need to be updated (converted 518 // to MUX). Those can be later converted to predicated instructions, so 519 // they aren't always adding extra cost. 520 // KLUDGE: Also, count the number of predicate register definitions in 521 // each block. The scheduler may increase the pressure of these and cause 522 // expensive spills (e.g. bitmnp01). 523 unsigned TotalPh = 0; 524 unsigned PredDefs = countPredicateDefs(FP.SplitB); 525 if (FP.JoinB) { 526 TotalPh = computePhiCost(FP.JoinB); 527 PredDefs += countPredicateDefs(FP.JoinB); 528 } else { 529 if (FP.TrueB && FP.TrueB->succ_size() > 0) { 530 MachineBasicBlock *SB = *FP.TrueB->succ_begin(); 531 TotalPh += computePhiCost(SB); 532 PredDefs += countPredicateDefs(SB); 533 } 534 if (FP.FalseB && FP.FalseB->succ_size() > 0) { 535 MachineBasicBlock *SB = *FP.FalseB->succ_begin(); 536 TotalPh += computePhiCost(SB); 537 PredDefs += countPredicateDefs(SB); 538 } 539 } 540 DEBUG(dbgs() << "Total number of extra muxes from converted phis: " 541 << TotalPh << "\n"); 542 if (TotalIn+TotalPh >= SizeLimit+Spare) 543 return false; 544 545 DEBUG(dbgs() << "Total number of predicate registers: " << PredDefs << "\n"); 546 if (PredDefs > 4) 547 return false; 548 549 return true; 550 } 551 552 553 bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock *B, 554 MachineLoop *L) { 555 bool Changed = false; 556 557 // Visit all dominated blocks from the same loop first, then process B. 558 MachineDomTreeNode *N = MDT->getNode(B); 559 typedef GraphTraits<MachineDomTreeNode*> GTN; 560 // We will change CFG/DT during this traversal, so take precautions to 561 // avoid problems related to invalidated iterators. In fact, processing 562 // a child C of B cannot cause another child to be removed, but it can 563 // cause a new child to be added (which was a child of C before C itself 564 // was removed. This new child C, however, would have been processed 565 // prior to processing B, so there is no need to process it again. 566 // Simply keep a list of children of B, and traverse that list. 567 typedef SmallVector<MachineDomTreeNode*,4> DTNodeVectType; 568 DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N)); 569 for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) { 570 MachineBasicBlock *SB = (*I)->getBlock(); 571 if (!Deleted.count(SB)) 572 Changed |= visitBlock(SB, L); 573 } 574 // When walking down the dominator tree, we want to traverse through 575 // blocks from nested (other) loops, because they can dominate blocks 576 // that are in L. Skip the non-L blocks only after the tree traversal. 577 if (MLI->getLoopFor(B) != L) 578 return Changed; 579 580 FlowPattern FP; 581 if (!matchFlowPattern(B, L, FP)) 582 return Changed; 583 584 if (!isValid(FP)) { 585 DEBUG(dbgs() << "Conversion is not valid\n"); 586 return Changed; 587 } 588 if (!isProfitable(FP)) { 589 DEBUG(dbgs() << "Conversion is not profitable\n"); 590 return Changed; 591 } 592 593 convert(FP); 594 simplifyFlowGraph(FP); 595 return true; 596 } 597 598 599 bool HexagonEarlyIfConversion::visitLoop(MachineLoop *L) { 600 MachineBasicBlock *HB = L ? L->getHeader() : 0; 601 DEBUG((L ? dbgs() << "Visiting loop H:" << PrintMB(HB) 602 : dbgs() << "Visiting function") << "\n"); 603 bool Changed = false; 604 if (L) { 605 for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) 606 Changed |= visitLoop(*I); 607 } 608 609 MachineBasicBlock *EntryB = GraphTraits<MachineFunction*>::getEntryNode(MFN); 610 Changed |= visitBlock(L ? HB : EntryB, L); 611 return Changed; 612 } 613 614 615 bool HexagonEarlyIfConversion::isPredicableStore(const MachineInstr *MI) 616 const { 617 // Exclude post-increment stores. Those return a value, so we cannot 618 // predicate them. 619 unsigned Opc = MI->getOpcode(); 620 using namespace Hexagon; 621 switch (Opc) { 622 // Store byte: 623 case S2_storerb_io: case S4_storerb_rr: 624 case S2_storerbabs: case S4_storeirb_io: case S2_storerbgp: 625 // Store halfword: 626 case S2_storerh_io: case S4_storerh_rr: 627 case S2_storerhabs: case S4_storeirh_io: case S2_storerhgp: 628 // Store upper halfword: 629 case S2_storerf_io: case S4_storerf_rr: 630 case S2_storerfabs: case S2_storerfgp: 631 // Store word: 632 case S2_storeri_io: case S4_storeri_rr: 633 case S2_storeriabs: case S4_storeiri_io: case S2_storerigp: 634 // Store doubleword: 635 case S2_storerd_io: case S4_storerd_rr: 636 case S2_storerdabs: case S2_storerdgp: 637 return true; 638 } 639 return false; 640 } 641 642 643 bool HexagonEarlyIfConversion::isSafeToSpeculate(const MachineInstr *MI) 644 const { 645 if (MI->mayLoad() || MI->mayStore()) 646 return false; 647 if (MI->isCall() || MI->isBarrier() || MI->isBranch()) 648 return false; 649 if (MI->hasUnmodeledSideEffects()) 650 return false; 651 652 return true; 653 } 654 655 656 unsigned HexagonEarlyIfConversion::getCondStoreOpcode(unsigned Opc, 657 bool IfTrue) const { 658 // Exclude post-increment stores. 659 using namespace Hexagon; 660 switch (Opc) { 661 case S2_storerb_io: 662 return IfTrue ? S2_pstorerbt_io : S2_pstorerbf_io; 663 case S4_storerb_rr: 664 return IfTrue ? S4_pstorerbt_rr : S4_pstorerbf_rr; 665 case S2_storerbabs: 666 case S2_storerbgp: 667 return IfTrue ? S4_pstorerbt_abs : S4_pstorerbf_abs; 668 case S4_storeirb_io: 669 return IfTrue ? S4_storeirbt_io : S4_storeirbf_io; 670 case S2_storerh_io: 671 return IfTrue ? S2_pstorerht_io : S2_pstorerhf_io; 672 case S4_storerh_rr: 673 return IfTrue ? S4_pstorerht_rr : S4_pstorerhf_rr; 674 case S2_storerhabs: 675 case S2_storerhgp: 676 return IfTrue ? S4_pstorerht_abs : S4_pstorerhf_abs; 677 case S2_storerf_io: 678 return IfTrue ? S2_pstorerft_io : S2_pstorerff_io; 679 case S4_storerf_rr: 680 return IfTrue ? S4_pstorerft_rr : S4_pstorerff_rr; 681 case S2_storerfabs: 682 case S2_storerfgp: 683 return IfTrue ? S4_pstorerft_abs : S4_pstorerff_abs; 684 case S4_storeirh_io: 685 return IfTrue ? S4_storeirht_io : S4_storeirhf_io; 686 case S2_storeri_io: 687 return IfTrue ? S2_pstorerit_io : S2_pstorerif_io; 688 case S4_storeri_rr: 689 return IfTrue ? S4_pstorerit_rr : S4_pstorerif_rr; 690 case S2_storeriabs: 691 case S2_storerigp: 692 return IfTrue ? S4_pstorerit_abs : S4_pstorerif_abs; 693 case S4_storeiri_io: 694 return IfTrue ? S4_storeirit_io : S4_storeirif_io; 695 case S2_storerd_io: 696 return IfTrue ? S2_pstorerdt_io : S2_pstorerdf_io; 697 case S4_storerd_rr: 698 return IfTrue ? S4_pstorerdt_rr : S4_pstorerdf_rr; 699 case S2_storerdabs: 700 case S2_storerdgp: 701 return IfTrue ? S4_pstorerdt_abs : S4_pstorerdf_abs; 702 } 703 llvm_unreachable("Unexpected opcode"); 704 return 0; 705 } 706 707 708 void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB, 709 MachineBasicBlock::iterator At, MachineInstr *MI, 710 unsigned PredR, bool IfTrue) { 711 DebugLoc DL; 712 if (At != ToB->end()) 713 DL = At->getDebugLoc(); 714 else if (!ToB->empty()) 715 DL = ToB->back().getDebugLoc(); 716 717 unsigned Opc = MI->getOpcode(); 718 719 if (isPredicableStore(MI)) { 720 unsigned COpc = getCondStoreOpcode(Opc, IfTrue); 721 assert(COpc); 722 MachineInstrBuilder MIB = BuildMI(*ToB, At, DL, TII->get(COpc)) 723 .addReg(PredR); 724 for (MIOperands MO(MI); MO.isValid(); ++MO) 725 MIB.addOperand(*MO); 726 727 // Set memory references. 728 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 729 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 730 MIB.setMemRefs(MMOBegin, MMOEnd); 731 732 MI->eraseFromParent(); 733 return; 734 } 735 736 if (Opc == Hexagon::J2_jump) { 737 MachineBasicBlock *TB = MI->getOperand(0).getMBB(); 738 const MCInstrDesc &D = TII->get(IfTrue ? Hexagon::J2_jumpt 739 : Hexagon::J2_jumpf); 740 BuildMI(*ToB, At, DL, D) 741 .addReg(PredR) 742 .addMBB(TB); 743 MI->eraseFromParent(); 744 return; 745 } 746 747 // Print the offending instruction unconditionally as we are about to 748 // abort. 749 dbgs() << *MI; 750 llvm_unreachable("Unexpected instruction"); 751 } 752 753 754 // Predicate/speculate non-branch instructions from FromB into block ToB. 755 // Leave the branches alone, they will be handled later. Btw, at this point 756 // FromB should have at most one branch, and it should be unconditional. 757 void HexagonEarlyIfConversion::predicateBlockNB(MachineBasicBlock *ToB, 758 MachineBasicBlock::iterator At, MachineBasicBlock *FromB, 759 unsigned PredR, bool IfTrue) { 760 DEBUG(dbgs() << "Predicating block " << PrintMB(FromB) << "\n"); 761 MachineBasicBlock::iterator End = FromB->getFirstTerminator(); 762 MachineBasicBlock::iterator I, NextI; 763 764 for (I = FromB->begin(); I != End; I = NextI) { 765 assert(!I->isPHI()); 766 NextI = std::next(I); 767 if (isSafeToSpeculate(&*I)) 768 ToB->splice(At, FromB, I); 769 else 770 predicateInstr(ToB, At, &*I, PredR, IfTrue); 771 } 772 } 773 774 775 void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB, 776 const FlowPattern &FP) { 777 // Visit all PHI nodes in the WhereB block and generate MUX instructions 778 // in the split block. Update the PHI nodes with the values of the MUX. 779 auto NonPHI = WhereB->getFirstNonPHI(); 780 for (auto I = WhereB->begin(); I != NonPHI; ++I) { 781 MachineInstr *PN = &*I; 782 // Registers and subregisters corresponding to TrueB, FalseB and SplitB. 783 unsigned TR = 0, TSR = 0, FR = 0, FSR = 0, SR = 0, SSR = 0; 784 for (int i = PN->getNumOperands()-2; i > 0; i -= 2) { 785 const MachineOperand &RO = PN->getOperand(i), &BO = PN->getOperand(i+1); 786 if (BO.getMBB() == FP.SplitB) 787 SR = RO.getReg(), SSR = RO.getSubReg(); 788 else if (BO.getMBB() == FP.TrueB) 789 TR = RO.getReg(), TSR = RO.getSubReg(); 790 else if (BO.getMBB() == FP.FalseB) 791 FR = RO.getReg(), FSR = RO.getSubReg(); 792 else 793 continue; 794 PN->RemoveOperand(i+1); 795 PN->RemoveOperand(i); 796 } 797 if (TR == 0) 798 TR = SR, TSR = SSR; 799 else if (FR == 0) 800 FR = SR, FSR = SSR; 801 assert(TR && FR); 802 803 using namespace Hexagon; 804 unsigned DR = PN->getOperand(0).getReg(); 805 const TargetRegisterClass *RC = MRI->getRegClass(DR); 806 const MCInstrDesc &D = RC == &IntRegsRegClass ? TII->get(C2_mux) 807 : TII->get(MUX64_rr); 808 809 MachineBasicBlock::iterator MuxAt = FP.SplitB->getFirstTerminator(); 810 DebugLoc DL; 811 if (MuxAt != FP.SplitB->end()) 812 DL = MuxAt->getDebugLoc(); 813 unsigned MuxR = MRI->createVirtualRegister(RC); 814 BuildMI(*FP.SplitB, MuxAt, DL, D, MuxR) 815 .addReg(FP.PredR) 816 .addReg(TR, 0, TSR) 817 .addReg(FR, 0, FSR); 818 819 PN->addOperand(MachineOperand::CreateReg(MuxR, false)); 820 PN->addOperand(MachineOperand::CreateMBB(FP.SplitB)); 821 } 822 } 823 824 825 void HexagonEarlyIfConversion::convert(const FlowPattern &FP) { 826 MachineBasicBlock *TSB = 0, *FSB = 0; 827 MachineBasicBlock::iterator OldTI = FP.SplitB->getFirstTerminator(); 828 assert(OldTI != FP.SplitB->end()); 829 DebugLoc DL = OldTI->getDebugLoc(); 830 831 if (FP.TrueB) { 832 TSB = *FP.TrueB->succ_begin(); 833 predicateBlockNB(FP.SplitB, OldTI, FP.TrueB, FP.PredR, true); 834 } 835 if (FP.FalseB) { 836 FSB = *FP.FalseB->succ_begin(); 837 MachineBasicBlock::iterator At = FP.SplitB->getFirstTerminator(); 838 predicateBlockNB(FP.SplitB, At, FP.FalseB, FP.PredR, false); 839 } 840 841 // Regenerate new terminators in the split block and update the successors. 842 // First, remember any information that may be needed later and remove the 843 // existing terminators/successors from the split block. 844 MachineBasicBlock *SSB = 0; 845 FP.SplitB->erase(OldTI, FP.SplitB->end()); 846 while (FP.SplitB->succ_size() > 0) { 847 MachineBasicBlock *T = *FP.SplitB->succ_begin(); 848 // It's possible that the split block had a successor that is not a pre- 849 // dicated block. This could only happen if there was only one block to 850 // be predicated. Example: 851 // split_b: 852 // if (p) jump true_b 853 // jump unrelated2_b 854 // unrelated1_b: 855 // ... 856 // unrelated2_b: ; can have other predecessors, so it's not "false_b" 857 // jump other_b 858 // true_b: ; only reachable from split_b, can be predicated 859 // ... 860 // 861 // Find this successor (SSB) if it exists. 862 if (T != FP.TrueB && T != FP.FalseB) { 863 assert(!SSB); 864 SSB = T; 865 } 866 FP.SplitB->removeSuccessor(FP.SplitB->succ_begin()); 867 } 868 869 // Insert new branches and update the successors of the split block. This 870 // may create unconditional branches to the layout successor, etc., but 871 // that will be cleaned up later. For now, make sure that correct code is 872 // generated. 873 if (FP.JoinB) { 874 assert(!SSB || SSB == FP.JoinB); 875 BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump)) 876 .addMBB(FP.JoinB); 877 FP.SplitB->addSuccessor(FP.JoinB); 878 } else { 879 bool HasBranch = false; 880 if (TSB) { 881 BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jumpt)) 882 .addReg(FP.PredR) 883 .addMBB(TSB); 884 FP.SplitB->addSuccessor(TSB); 885 HasBranch = true; 886 } 887 if (FSB) { 888 const MCInstrDesc &D = HasBranch ? TII->get(Hexagon::J2_jump) 889 : TII->get(Hexagon::J2_jumpf); 890 MachineInstrBuilder MIB = BuildMI(*FP.SplitB, FP.SplitB->end(), DL, D); 891 if (!HasBranch) 892 MIB.addReg(FP.PredR); 893 MIB.addMBB(FSB); 894 FP.SplitB->addSuccessor(FSB); 895 } 896 if (SSB) { 897 // This cannot happen if both TSB and FSB are set. [TF]SB are the 898 // successor blocks of the TrueB and FalseB (or null of the TrueB 899 // or FalseB block is null). SSB is the potential successor block 900 // of the SplitB that is neither TrueB nor FalseB. 901 BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump)) 902 .addMBB(SSB); 903 FP.SplitB->addSuccessor(SSB); 904 } 905 } 906 907 // What is left to do is to update the PHI nodes that could have entries 908 // referring to predicated blocks. 909 if (FP.JoinB) { 910 updatePhiNodes(FP.JoinB, FP); 911 } else { 912 if (TSB) 913 updatePhiNodes(TSB, FP); 914 if (FSB) 915 updatePhiNodes(FSB, FP); 916 // Nothing to update in SSB, since SSB's predecessors haven't changed. 917 } 918 } 919 920 921 void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) { 922 DEBUG(dbgs() << "Removing block " << PrintMB(B) << "\n"); 923 924 // Transfer the immediate dominator information from B to its descendants. 925 MachineDomTreeNode *N = MDT->getNode(B); 926 MachineDomTreeNode *IDN = N->getIDom(); 927 if (IDN) { 928 MachineBasicBlock *IDB = IDN->getBlock(); 929 typedef GraphTraits<MachineDomTreeNode*> GTN; 930 typedef SmallVector<MachineDomTreeNode*,4> DTNodeVectType; 931 DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N)); 932 for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) { 933 MachineBasicBlock *SB = (*I)->getBlock(); 934 MDT->changeImmediateDominator(SB, IDB); 935 } 936 } 937 938 while (B->succ_size() > 0) 939 B->removeSuccessor(B->succ_begin()); 940 941 for (auto I = B->pred_begin(), E = B->pred_end(); I != E; ++I) 942 (*I)->removeSuccessor(B, true); 943 944 Deleted.insert(B); 945 MDT->eraseNode(B); 946 MFN->erase(B->getIterator()); 947 } 948 949 950 void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) { 951 DEBUG(dbgs() << "Removing phi nodes from block " << PrintMB(B) << "\n"); 952 MachineBasicBlock::iterator I, NextI, NonPHI = B->getFirstNonPHI(); 953 for (I = B->begin(); I != NonPHI; I = NextI) { 954 NextI = std::next(I); 955 MachineInstr *PN = &*I; 956 assert(PN->getNumOperands() == 3 && "Invalid phi node"); 957 MachineOperand &UO = PN->getOperand(1); 958 unsigned UseR = UO.getReg(), UseSR = UO.getSubReg(); 959 unsigned DefR = PN->getOperand(0).getReg(); 960 unsigned NewR = UseR; 961 if (UseSR) { 962 // MRI.replaceVregUsesWith does not allow to update the subregister, 963 // so instead of doing the use-iteration here, create a copy into a 964 // "non-subregistered" register. 965 DebugLoc DL = PN->getDebugLoc(); 966 const TargetRegisterClass *RC = MRI->getRegClass(DefR); 967 NewR = MRI->createVirtualRegister(RC); 968 NonPHI = BuildMI(*B, NonPHI, DL, TII->get(TargetOpcode::COPY), NewR) 969 .addReg(UseR, 0, UseSR); 970 } 971 MRI->replaceRegWith(DefR, NewR); 972 B->erase(I); 973 } 974 } 975 976 977 void HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB, 978 MachineBasicBlock *NewB) { 979 for (auto I = OldB->succ_begin(), E = OldB->succ_end(); I != E; ++I) { 980 MachineBasicBlock *SB = *I; 981 MachineBasicBlock::iterator P, N = SB->getFirstNonPHI(); 982 for (P = SB->begin(); P != N; ++P) { 983 MachineInstr *PN = &*P; 984 for (MIOperands MO(PN); MO.isValid(); ++MO) 985 if (MO->isMBB() && MO->getMBB() == OldB) 986 MO->setMBB(NewB); 987 } 988 } 989 } 990 991 992 void HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock *PredB, 993 MachineBasicBlock *SuccB) { 994 DEBUG(dbgs() << "Merging blocks " << PrintMB(PredB) << " and " 995 << PrintMB(SuccB) << "\n"); 996 bool TermOk = hasUncondBranch(SuccB); 997 eliminatePhis(SuccB); 998 TII->RemoveBranch(*PredB); 999 PredB->removeSuccessor(SuccB); 1000 PredB->splice(PredB->end(), SuccB, SuccB->begin(), SuccB->end()); 1001 MachineBasicBlock::succ_iterator I, E = SuccB->succ_end(); 1002 for (I = SuccB->succ_begin(); I != E; ++I) 1003 PredB->addSuccessor(*I); 1004 PredB->normalizeSuccProbs(); 1005 replacePhiEdges(SuccB, PredB); 1006 removeBlock(SuccB); 1007 if (!TermOk) 1008 PredB->updateTerminator(); 1009 } 1010 1011 1012 void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern &FP) { 1013 if (FP.TrueB) 1014 removeBlock(FP.TrueB); 1015 if (FP.FalseB) 1016 removeBlock(FP.FalseB); 1017 1018 FP.SplitB->updateTerminator(); 1019 if (FP.SplitB->succ_size() != 1) 1020 return; 1021 1022 MachineBasicBlock *SB = *FP.SplitB->succ_begin(); 1023 if (SB->pred_size() != 1) 1024 return; 1025 1026 // By now, the split block has only one successor (SB), and SB has only 1027 // one predecessor. We can try to merge them. We will need to update ter- 1028 // minators in FP.Split+SB, and that requires working AnalyzeBranch, which 1029 // fails on Hexagon for blocks that have EH_LABELs. However, if SB ends 1030 // with an unconditional branch, we won't need to touch the terminators. 1031 if (!hasEHLabel(SB) || hasUncondBranch(SB)) 1032 mergeBlocks(FP.SplitB, SB); 1033 } 1034 1035 1036 bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) { 1037 auto &ST = MF.getSubtarget(); 1038 TII = ST.getInstrInfo(); 1039 TRI = ST.getRegisterInfo(); 1040 MFN = &MF; 1041 MRI = &MF.getRegInfo(); 1042 MDT = &getAnalysis<MachineDominatorTree>(); 1043 MLI = &getAnalysis<MachineLoopInfo>(); 1044 MBPI = EnableHexagonBP ? &getAnalysis<MachineBranchProbabilityInfo>() : 1045 nullptr; 1046 1047 Deleted.clear(); 1048 bool Changed = false; 1049 1050 for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I) 1051 Changed |= visitLoop(*I); 1052 Changed |= visitLoop(0); 1053 1054 return Changed; 1055 } 1056 1057 //===----------------------------------------------------------------------===// 1058 // Public Constructor Functions 1059 //===----------------------------------------------------------------------===// 1060 FunctionPass *llvm::createHexagonEarlyIfConversion() { 1061 return new HexagonEarlyIfConversion(); 1062 } 1063 1064