1 //===--- HexagonExpandCondsets.cpp ----------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 // Replace mux instructions with the corresponding legal instructions. 11 // It is meant to work post-SSA, but still on virtual registers. It was 12 // originally placed between register coalescing and machine instruction 13 // scheduler. 14 // In this place in the optimization sequence, live interval analysis had 15 // been performed, and the live intervals should be preserved. A large part 16 // of the code deals with preserving the liveness information. 17 // 18 // Liveness tracking aside, the main functionality of this pass is divided 19 // into two steps. The first step is to replace an instruction 20 // vreg0 = C2_mux vreg1, vreg2, vreg3 21 // with a pair of conditional transfers 22 // vreg0 = A2_tfrt vreg1, vreg2 23 // vreg0 = A2_tfrf vreg1, vreg3 24 // It is the intention that the execution of this pass could be terminated 25 // after this step, and the code generated would be functionally correct. 26 // 27 // If the uses of the source values vreg1 and vreg2 are kills, and their 28 // definitions are predicable, then in the second step, the conditional 29 // transfers will then be rewritten as predicated instructions. E.g. 30 // vreg0 = A2_or vreg1, vreg2 31 // vreg3 = A2_tfrt vreg99, vreg0<kill> 32 // will be rewritten as 33 // vreg3 = A2_port vreg99, vreg1, vreg2 34 // 35 // This replacement has two variants: "up" and "down". Consider this case: 36 // vreg0 = A2_or vreg1, vreg2 37 // ... [intervening instructions] ... 38 // vreg3 = A2_tfrt vreg99, vreg0<kill> 39 // variant "up": 40 // vreg3 = A2_port vreg99, vreg1, vreg2 41 // ... [intervening instructions, vreg0->vreg3] ... 42 // [deleted] 43 // variant "down": 44 // [deleted] 45 // ... [intervening instructions] ... 46 // vreg3 = A2_port vreg99, vreg1, vreg2 47 // 48 // Both, one or none of these variants may be valid, and checks are made 49 // to rule out inapplicable variants. 50 // 51 // As an additional optimization, before either of the two steps above is 52 // executed, the pass attempts to coalesce the target register with one of 53 // the source registers, e.g. given an instruction 54 // vreg3 = C2_mux vreg0, vreg1, vreg2 55 // vreg3 will be coalesced with either vreg1 or vreg2. If this succeeds, 56 // the instruction would then be (for example) 57 // vreg3 = C2_mux vreg0, vreg3, vreg2 58 // and, under certain circumstances, this could result in only one predicated 59 // instruction: 60 // vreg3 = A2_tfrf vreg0, vreg2 61 // 62 63 // Splitting a definition of a register into two predicated transfers 64 // creates a complication in liveness tracking. Live interval computation 65 // will see both instructions as actual definitions, and will mark the 66 // first one as dead. The definition is not actually dead, and this 67 // situation will need to be fixed. For example: 68 // vreg1<def,dead> = A2_tfrt ... ; marked as dead 69 // vreg1<def> = A2_tfrf ... 70 // 71 // Since any of the individual predicated transfers may end up getting 72 // removed (in case it is an identity copy), some pre-existing def may 73 // be marked as dead after live interval recomputation: 74 // vreg1<def,dead> = ... ; marked as dead 75 // ... 76 // vreg1<def> = A2_tfrf ... ; if A2_tfrt is removed 77 // This case happens if vreg1 was used as a source in A2_tfrt, which means 78 // that is it actually live at the A2_tfrf, and so the now dead definition 79 // of vreg1 will need to be updated to non-dead at some point. 80 // 81 // This issue could be remedied by adding implicit uses to the predicated 82 // transfers, but this will create a problem with subsequent predication, 83 // since the transfers will no longer be possible to reorder. To avoid 84 // that, the initial splitting will not add any implicit uses. These 85 // implicit uses will be added later, after predication. The extra price, 86 // however, is that finding the locations where the implicit uses need 87 // to be added, and updating the live ranges will be more involved. 88 // 89 // An additional problem appears when subregister liveness tracking is 90 // enabled. In such a scenario, the live interval for the super-register 91 // will have live ranges for each subregister (i.e. subranges). This sub- 92 // range contains all liveness information about the subregister, except 93 // for one case: a "read-undef" flag from another subregister will not 94 // be reflected: given 95 // vreg1:subreg_hireg<def,read-undef> = ... ; "undefines" subreg_loreg 96 // the subrange for subreg_loreg will not have any indication that it is 97 // undefined at this point. Calculating subregister liveness based only 98 // on the information from the subrange may create a segment which spans 99 // over such a "read-undef" flag. This would create inconsistencies in 100 // the liveness data, resulting in assertions or incorrect code. 101 // Example: 102 // vreg1:subreg_loreg<def> = ... 103 // vreg1:subreg_hireg<def, read-undef> = ... ; "undefines" subreg_loreg 104 // ... 105 // vreg1:subreg_loreg<def> = A2_tfrt ... ; may end up with imp-use 106 // ; of subreg_loreg 107 // The remedy takes advantage of the fact, that at this point we have 108 // an unconditional definition of the subregister. What this means is 109 // that any preceding value in this subregister will be overwritten, 110 // or in other words, the last use before this def is a kill. This also 111 // implies that the first of the predicated transfers at this location 112 // should not have any implicit uses. 113 // Assume for a moment that no part of the corresponding super-register 114 // is used as a source. In such case, the entire super-register can be 115 // considered undefined immediately before this instruction. Because of 116 // that, we can insert an IMPLICIT_DEF of the super-register at this 117 // location, which will cause it to be reflected in all the associated 118 // subranges. What is important here is that if an IMPLICIT_DEF of 119 // subreg_loreg was used, we would lose the indication that subreg_hireg 120 // is also considered undefined. This could lead to having implicit uses 121 // incorrectly added. 122 // 123 // What is left is the two cases when the super-register is used as a 124 // source. 125 // * Case 1: the used part is the same as the one that is defined: 126 // vreg1<def> = ... 127 // ... 128 // vreg1:subreg_loreg<def,read-undef> = C2_mux ..., vreg1:subreg_loreg 129 // In the end, the subreg_loreg should be marked as live at the point of 130 // the splitting: 131 // vreg1:subreg_loreg<def,read-undef> = A2_tfrt ; should have imp-use 132 // vreg1:subreg_loreg<def,read-undef> = A2_tfrf ; should have imp-use 133 // Hence, an IMPLICIT_DEF of only vreg1:subreg_hireg would be sufficient. 134 // * Case 2: the used part does not overlap the part being defined: 135 // vreg1<def> = ... 136 // ... 137 // vreg1:subreg_loreg<def,read-undef> = C2_mux ..., vreg1:subreg_hireg 138 // For this case, we insert an IMPLICIT_DEF of vreg1:subreg_hireg after 139 // the C2_mux. 140 141 #define DEBUG_TYPE "expand-condsets" 142 143 #include "HexagonTargetMachine.h" 144 #include "llvm/ADT/SetVector.h" 145 #include "llvm/CodeGen/Passes.h" 146 #include "llvm/CodeGen/LiveInterval.h" 147 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 148 #include "llvm/CodeGen/MachineDominators.h" 149 #include "llvm/CodeGen/MachineFunction.h" 150 #include "llvm/CodeGen/MachineInstrBuilder.h" 151 #include "llvm/CodeGen/MachineRegisterInfo.h" 152 #include "llvm/Target/TargetInstrInfo.h" 153 #include "llvm/Target/TargetMachine.h" 154 #include "llvm/Target/TargetRegisterInfo.h" 155 #include "llvm/Support/CommandLine.h" 156 #include "llvm/Support/Debug.h" 157 #include "llvm/Support/raw_ostream.h" 158 159 #include <algorithm> 160 #include <iterator> 161 #include <set> 162 #include <utility> 163 164 using namespace llvm; 165 166 static cl::opt<unsigned> OptTfrLimit("expand-condsets-tfr-limit", 167 cl::init(~0U), cl::Hidden, cl::desc("Max number of mux expansions")); 168 static cl::opt<unsigned> OptCoaLimit("expand-condsets-coa-limit", 169 cl::init(~0U), cl::Hidden, cl::desc("Max number of segment coalescings")); 170 171 namespace llvm { 172 void initializeHexagonExpandCondsetsPass(PassRegistry&); 173 FunctionPass *createHexagonExpandCondsets(); 174 } 175 176 namespace { 177 class HexagonExpandCondsets : public MachineFunctionPass { 178 public: 179 static char ID; 180 HexagonExpandCondsets() : 181 MachineFunctionPass(ID), HII(0), TRI(0), MRI(0), 182 LIS(0), CoaLimitActive(false), 183 TfrLimitActive(false), CoaCounter(0), TfrCounter(0) { 184 if (OptCoaLimit.getPosition()) 185 CoaLimitActive = true, CoaLimit = OptCoaLimit; 186 if (OptTfrLimit.getPosition()) 187 TfrLimitActive = true, TfrLimit = OptTfrLimit; 188 initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry()); 189 } 190 191 const char *getPassName() const override { 192 return "Hexagon Expand Condsets"; 193 } 194 void getAnalysisUsage(AnalysisUsage &AU) const override { 195 AU.addRequired<LiveIntervals>(); 196 AU.addPreserved<LiveIntervals>(); 197 AU.addPreserved<SlotIndexes>(); 198 AU.addRequired<MachineDominatorTree>(); 199 AU.addPreserved<MachineDominatorTree>(); 200 MachineFunctionPass::getAnalysisUsage(AU); 201 } 202 bool runOnMachineFunction(MachineFunction &MF) override; 203 204 private: 205 const HexagonInstrInfo *HII; 206 const TargetRegisterInfo *TRI; 207 MachineDominatorTree *MDT; 208 MachineRegisterInfo *MRI; 209 LiveIntervals *LIS; 210 std::set<MachineInstr*> LocalImpDefs; 211 212 bool CoaLimitActive, TfrLimitActive; 213 unsigned CoaLimit, TfrLimit, CoaCounter, TfrCounter; 214 215 struct RegisterRef { 216 RegisterRef(const MachineOperand &Op) : Reg(Op.getReg()), 217 Sub(Op.getSubReg()) {} 218 RegisterRef(unsigned R = 0, unsigned S = 0) : Reg(R), Sub(S) {} 219 bool operator== (RegisterRef RR) const { 220 return Reg == RR.Reg && Sub == RR.Sub; 221 } 222 bool operator!= (RegisterRef RR) const { return !operator==(RR); } 223 bool operator< (RegisterRef RR) const { 224 return Reg < RR.Reg || (Reg == RR.Reg && Sub < RR.Sub); 225 } 226 unsigned Reg, Sub; 227 }; 228 229 typedef DenseMap<unsigned,unsigned> ReferenceMap; 230 enum { Sub_Low = 0x1, Sub_High = 0x2, Sub_None = (Sub_Low | Sub_High) }; 231 enum { Exec_Then = 0x10, Exec_Else = 0x20 }; 232 unsigned getMaskForSub(unsigned Sub); 233 bool isCondset(const MachineInstr &MI); 234 LaneBitmask getLaneMask(unsigned Reg, unsigned Sub); 235 236 void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec); 237 bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec); 238 239 void removeImpDefSegments(LiveRange &Range); 240 void updateDeadsInRange(unsigned Reg, LaneBitmask LM, LiveRange &Range); 241 void updateKillFlags(unsigned Reg); 242 void updateDeadFlags(unsigned Reg); 243 void recalculateLiveInterval(unsigned Reg); 244 void removeInstr(MachineInstr &MI); 245 void updateLiveness(std::set<unsigned> &RegSet, bool Recalc, 246 bool UpdateKills, bool UpdateDeads); 247 248 unsigned getCondTfrOpcode(const MachineOperand &SO, bool Cond); 249 MachineInstr *genCondTfrFor(MachineOperand &SrcOp, 250 MachineBasicBlock::iterator At, unsigned DstR, 251 unsigned DstSR, const MachineOperand &PredOp, bool PredSense, 252 bool ReadUndef, bool ImpUse); 253 bool split(MachineInstr &MI, std::set<unsigned> &UpdRegs); 254 bool splitInBlock(MachineBasicBlock &B, std::set<unsigned> &UpdRegs); 255 256 bool isPredicable(MachineInstr *MI); 257 MachineInstr *getReachingDefForPred(RegisterRef RD, 258 MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond); 259 bool canMoveOver(MachineInstr &MI, ReferenceMap &Defs, ReferenceMap &Uses); 260 bool canMoveMemTo(MachineInstr &MI, MachineInstr &ToI, bool IsDown); 261 void predicateAt(const MachineOperand &DefOp, MachineInstr &MI, 262 MachineBasicBlock::iterator Where, 263 const MachineOperand &PredOp, bool Cond, 264 std::set<unsigned> &UpdRegs); 265 void renameInRange(RegisterRef RO, RegisterRef RN, unsigned PredR, 266 bool Cond, MachineBasicBlock::iterator First, 267 MachineBasicBlock::iterator Last); 268 bool predicate(MachineInstr &TfrI, bool Cond, std::set<unsigned> &UpdRegs); 269 bool predicateInBlock(MachineBasicBlock &B, 270 std::set<unsigned> &UpdRegs); 271 272 bool isIntReg(RegisterRef RR, unsigned &BW); 273 bool isIntraBlocks(LiveInterval &LI); 274 bool coalesceRegisters(RegisterRef R1, RegisterRef R2); 275 bool coalesceSegments(MachineFunction &MF); 276 }; 277 } 278 279 char HexagonExpandCondsets::ID = 0; 280 281 INITIALIZE_PASS_BEGIN(HexagonExpandCondsets, "expand-condsets", 282 "Hexagon Expand Condsets", false, false) 283 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 284 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 285 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 286 INITIALIZE_PASS_END(HexagonExpandCondsets, "expand-condsets", 287 "Hexagon Expand Condsets", false, false) 288 289 unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) { 290 switch (Sub) { 291 case Hexagon::subreg_loreg: 292 return Sub_Low; 293 case Hexagon::subreg_hireg: 294 return Sub_High; 295 case Hexagon::NoSubRegister: 296 return Sub_None; 297 } 298 llvm_unreachable("Invalid subregister"); 299 } 300 301 bool HexagonExpandCondsets::isCondset(const MachineInstr &MI) { 302 unsigned Opc = MI.getOpcode(); 303 switch (Opc) { 304 case Hexagon::C2_mux: 305 case Hexagon::C2_muxii: 306 case Hexagon::C2_muxir: 307 case Hexagon::C2_muxri: 308 case Hexagon::MUX64_rr: 309 return true; 310 break; 311 } 312 return false; 313 } 314 315 316 LaneBitmask HexagonExpandCondsets::getLaneMask(unsigned Reg, unsigned Sub) { 317 assert(TargetRegisterInfo::isVirtualRegister(Reg)); 318 return Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub) 319 : MRI->getMaxLaneMaskForVReg(Reg); 320 } 321 322 323 void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map, 324 unsigned Exec) { 325 unsigned Mask = getMaskForSub(RR.Sub) | Exec; 326 ReferenceMap::iterator F = Map.find(RR.Reg); 327 if (F == Map.end()) 328 Map.insert(std::make_pair(RR.Reg, Mask)); 329 else 330 F->second |= Mask; 331 } 332 333 334 bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map, 335 unsigned Exec) { 336 ReferenceMap::iterator F = Map.find(RR.Reg); 337 if (F == Map.end()) 338 return false; 339 unsigned Mask = getMaskForSub(RR.Sub) | Exec; 340 if (Mask & F->second) 341 return true; 342 return false; 343 } 344 345 346 void HexagonExpandCondsets::updateKillFlags(unsigned Reg) { 347 auto KillAt = [this,Reg] (SlotIndex K, LaneBitmask LM) -> void { 348 // Set the <kill> flag on a use of Reg whose lane mask is contained in LM. 349 MachineInstr *MI = LIS->getInstructionFromIndex(K); 350 for (auto &Op : MI->operands()) { 351 if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg) 352 continue; 353 LaneBitmask SLM = getLaneMask(Reg, Op.getSubReg()); 354 if ((SLM & LM) == SLM) { 355 // Only set the kill flag on the first encountered use of Reg in this 356 // instruction. 357 Op.setIsKill(true); 358 break; 359 } 360 } 361 }; 362 363 LiveInterval &LI = LIS->getInterval(Reg); 364 for (auto I = LI.begin(), E = LI.end(); I != E; ++I) { 365 if (!I->end.isRegister()) 366 continue; 367 // Do not mark the end of the segment as <kill>, if the next segment 368 // starts with a predicated instruction. 369 auto NextI = std::next(I); 370 if (NextI != E && NextI->start.isRegister()) { 371 MachineInstr *DefI = LIS->getInstructionFromIndex(NextI->start); 372 if (HII->isPredicated(*DefI)) 373 continue; 374 } 375 bool WholeReg = true; 376 if (LI.hasSubRanges()) { 377 auto EndsAtI = [I] (LiveInterval::SubRange &S) -> bool { 378 LiveRange::iterator F = S.find(I->end); 379 return F != S.end() && I->end == F->end; 380 }; 381 // Check if all subranges end at I->end. If so, make sure to kill 382 // the whole register. 383 for (LiveInterval::SubRange &S : LI.subranges()) { 384 if (EndsAtI(S)) 385 KillAt(I->end, S.LaneMask); 386 else 387 WholeReg = false; 388 } 389 } 390 if (WholeReg) 391 KillAt(I->end, MRI->getMaxLaneMaskForVReg(Reg)); 392 } 393 } 394 395 396 void HexagonExpandCondsets::removeImpDefSegments(LiveRange &Range) { 397 auto StartImpDef = [this] (LiveRange::Segment &S) -> bool { 398 return S.start.isRegister() && 399 LocalImpDefs.count(LIS->getInstructionFromIndex(S.start)); 400 }; 401 Range.segments.erase(std::remove_if(Range.begin(), Range.end(), StartImpDef), 402 Range.end()); 403 } 404 405 void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM, 406 LiveRange &Range) { 407 assert(TargetRegisterInfo::isVirtualRegister(Reg)); 408 if (Range.empty()) 409 return; 410 411 auto IsRegDef = [this,Reg,LM] (MachineOperand &Op) -> bool { 412 if (!Op.isReg() || !Op.isDef()) 413 return false; 414 unsigned DR = Op.getReg(), DSR = Op.getSubReg(); 415 if (!TargetRegisterInfo::isVirtualRegister(DR) || DR != Reg) 416 return false; 417 LaneBitmask SLM = getLaneMask(DR, DSR); 418 return (SLM & LM) != 0; 419 }; 420 421 // The splitting step will create pairs of predicated definitions without 422 // any implicit uses (since implicit uses would interfere with predication). 423 // This can cause the reaching defs to become dead after live range 424 // recomputation, even though they are not really dead. 425 // We need to identify predicated defs that need implicit uses, and 426 // dead defs that are not really dead, and correct both problems. 427 428 SetVector<MachineBasicBlock*> Defs; 429 auto Dominate = [this] (SetVector<MachineBasicBlock*> &Defs, 430 MachineBasicBlock *Dest) -> bool { 431 for (MachineBasicBlock *D : Defs) 432 if (D != Dest && MDT->dominates(D, Dest)) 433 return true; 434 435 MachineBasicBlock *Entry = &Dest->getParent()->front(); 436 SetVector<MachineBasicBlock*> Work(Dest->pred_begin(), Dest->pred_end()); 437 for (unsigned i = 0; i < Work.size(); ++i) { 438 MachineBasicBlock *B = Work[i]; 439 if (Defs.count(B)) 440 continue; 441 if (B == Entry) 442 return false; 443 for (auto *P : B->predecessors()) 444 Work.insert(P); 445 } 446 return true; 447 }; 448 449 // First, try to extend live range within individual basic blocks. This 450 // will leave us only with dead defs that do not reach any predicated 451 // defs in the same block. 452 SmallVector<SlotIndex,4> PredDefs; 453 for (auto &Seg : Range) { 454 if (!Seg.start.isRegister()) 455 continue; 456 MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start); 457 if (LocalImpDefs.count(DefI)) 458 continue; 459 Defs.insert(DefI->getParent()); 460 if (HII->isPredicated(*DefI)) 461 PredDefs.push_back(Seg.start); 462 } 463 for (auto &SI : PredDefs) { 464 MachineBasicBlock *BB = LIS->getMBBFromIndex(SI); 465 if (Range.extendInBlock(LIS->getMBBStartIdx(BB), SI)) 466 SI = SlotIndex(); 467 } 468 469 // Calculate reachability for those predicated defs that were not handled 470 // by the in-block extension. 471 SmallVector<SlotIndex,4> ExtTo; 472 for (auto &SI : PredDefs) { 473 if (!SI.isValid()) 474 continue; 475 MachineBasicBlock *BB = LIS->getMBBFromIndex(SI); 476 if (BB->pred_empty()) 477 continue; 478 // If the defs from this range reach SI via all predecessors, it is live. 479 if (Dominate(Defs, BB)) 480 ExtTo.push_back(SI); 481 } 482 LIS->extendToIndices(Range, ExtTo); 483 484 // Remove <dead> flags from all defs that are not dead after live range 485 // extension, and collect all def operands. They will be used to generate 486 // the necessary implicit uses. 487 std::set<RegisterRef> DefRegs; 488 for (auto &Seg : Range) { 489 if (!Seg.start.isRegister()) 490 continue; 491 MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start); 492 if (LocalImpDefs.count(DefI)) 493 continue; 494 for (auto &Op : DefI->operands()) { 495 if (Seg.start.isDead() || !IsRegDef(Op)) 496 continue; 497 DefRegs.insert(Op); 498 Op.setIsDead(false); 499 } 500 } 501 502 503 // Finally, add implicit uses to each predicated def that is reached 504 // by other defs. Remove segments started by implicit-defs first, since 505 // they do not define registers. 506 removeImpDefSegments(Range); 507 508 for (auto &Seg : Range) { 509 if (!Seg.start.isRegister() || !Range.liveAt(Seg.start.getPrevSlot())) 510 continue; 511 MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start); 512 if (!HII->isPredicated(*DefI)) 513 continue; 514 MachineFunction &MF = *DefI->getParent()->getParent(); 515 // Construct the set of all necessary implicit uses, based on the def 516 // operands in the instruction. 517 std::set<RegisterRef> ImpUses; 518 for (auto &Op : DefI->operands()) 519 if (Op.isReg() && Op.isDef() && DefRegs.count(Op)) 520 ImpUses.insert(Op); 521 for (RegisterRef R : ImpUses) 522 MachineInstrBuilder(MF, DefI).addReg(R.Reg, RegState::Implicit, R.Sub); 523 } 524 } 525 526 527 void HexagonExpandCondsets::updateDeadFlags(unsigned Reg) { 528 LiveInterval &LI = LIS->getInterval(Reg); 529 if (LI.hasSubRanges()) { 530 for (LiveInterval::SubRange &S : LI.subranges()) { 531 updateDeadsInRange(Reg, S.LaneMask, S); 532 LIS->shrinkToUses(S, Reg); 533 // LI::shrinkToUses will add segments started by implicit-defs. 534 // Remove them again. 535 removeImpDefSegments(S); 536 } 537 LI.clear(); 538 LIS->constructMainRangeFromSubranges(LI); 539 } else { 540 updateDeadsInRange(Reg, MRI->getMaxLaneMaskForVReg(Reg), LI); 541 } 542 } 543 544 545 void HexagonExpandCondsets::recalculateLiveInterval(unsigned Reg) { 546 LIS->removeInterval(Reg); 547 LIS->createAndComputeVirtRegInterval(Reg); 548 } 549 550 void HexagonExpandCondsets::removeInstr(MachineInstr &MI) { 551 LIS->RemoveMachineInstrFromMaps(MI); 552 MI.eraseFromParent(); 553 } 554 555 556 void HexagonExpandCondsets::updateLiveness(std::set<unsigned> &RegSet, 557 bool Recalc, bool UpdateKills, bool UpdateDeads) { 558 UpdateKills |= UpdateDeads; 559 for (auto R : RegSet) { 560 if (Recalc) 561 recalculateLiveInterval(R); 562 if (UpdateKills) 563 MRI->clearKillFlags(R); 564 if (UpdateDeads) 565 updateDeadFlags(R); 566 // Fixing <dead> flags may extend live ranges, so reset <kill> flags 567 // after that. 568 if (UpdateKills) 569 updateKillFlags(R); 570 LIS->getInterval(R).verify(); 571 } 572 } 573 574 575 /// Get the opcode for a conditional transfer of the value in SO (source 576 /// operand). The condition (true/false) is given in Cond. 577 unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO, 578 bool IfTrue) { 579 using namespace Hexagon; 580 if (SO.isReg()) { 581 unsigned PhysR; 582 RegisterRef RS = SO; 583 if (TargetRegisterInfo::isVirtualRegister(RS.Reg)) { 584 const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg); 585 assert(VC->begin() != VC->end() && "Empty register class"); 586 PhysR = *VC->begin(); 587 } else { 588 assert(TargetRegisterInfo::isPhysicalRegister(RS.Reg)); 589 PhysR = RS.Reg; 590 } 591 unsigned PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub); 592 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS); 593 switch (RC->getSize()) { 594 case 4: 595 return IfTrue ? A2_tfrt : A2_tfrf; 596 case 8: 597 return IfTrue ? A2_tfrpt : A2_tfrpf; 598 } 599 llvm_unreachable("Invalid register operand"); 600 } 601 if (SO.isImm() || SO.isFPImm()) 602 return IfTrue ? C2_cmoveit : C2_cmoveif; 603 llvm_unreachable("Unexpected source operand"); 604 } 605 606 607 /// Generate a conditional transfer, copying the value SrcOp to the 608 /// destination register DstR:DstSR, and using the predicate register from 609 /// PredOp. The Cond argument specifies whether the predicate is to be 610 /// if(PredOp), or if(!PredOp). 611 MachineInstr *HexagonExpandCondsets::genCondTfrFor(MachineOperand &SrcOp, 612 MachineBasicBlock::iterator At, 613 unsigned DstR, unsigned DstSR, const MachineOperand &PredOp, 614 bool PredSense, bool ReadUndef, bool ImpUse) { 615 MachineInstr *MI = SrcOp.getParent(); 616 MachineBasicBlock &B = *At->getParent(); 617 const DebugLoc &DL = MI->getDebugLoc(); 618 619 // Don't avoid identity copies here (i.e. if the source and the destination 620 // are the same registers). It is actually better to generate them here, 621 // since this would cause the copy to potentially be predicated in the next 622 // step. The predication will remove such a copy if it is unable to 623 /// predicate. 624 625 unsigned Opc = getCondTfrOpcode(SrcOp, PredSense); 626 unsigned State = RegState::Define | (ReadUndef ? RegState::Undef : 0); 627 MachineInstrBuilder MIB = BuildMI(B, At, DL, HII->get(Opc)) 628 .addReg(DstR, State, DstSR) 629 .addOperand(PredOp) 630 .addOperand(SrcOp); 631 632 // We don't want any kills yet. 633 MIB->clearKillInfo(); 634 DEBUG(dbgs() << "created an initial copy: " << *MIB); 635 return &*MIB; 636 } 637 638 639 /// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function 640 /// performs all necessary changes to complete the replacement. 641 bool HexagonExpandCondsets::split(MachineInstr &MI, 642 std::set<unsigned> &UpdRegs) { 643 if (TfrLimitActive) { 644 if (TfrCounter >= TfrLimit) 645 return false; 646 TfrCounter++; 647 } 648 DEBUG(dbgs() << "\nsplitting BB#" << MI.getParent()->getNumber() << ": " 649 << MI); 650 MachineOperand &MD = MI.getOperand(0); // Definition 651 MachineOperand &MP = MI.getOperand(1); // Predicate register 652 MachineOperand &MS1 = MI.getOperand(2); // Source value #1 653 MachineOperand &MS2 = MI.getOperand(3); // Source value #2 654 assert(MD.isDef()); 655 unsigned DR = MD.getReg(), DSR = MD.getSubReg(); 656 bool ReadUndef = MD.isUndef(); 657 MachineBasicBlock::iterator At = MI; 658 659 if (ReadUndef && DSR != 0 && MRI->shouldTrackSubRegLiveness(DR)) { 660 unsigned NewSR = 0; 661 MachineBasicBlock::iterator DefAt = At; 662 bool SameReg = (MS1.isReg() && DR == MS1.getReg()) || 663 (MS2.isReg() && DR == MS2.getReg()); 664 if (SameReg) { 665 NewSR = (DSR == Hexagon::subreg_loreg) ? Hexagon::subreg_hireg 666 : Hexagon::subreg_loreg; 667 // Advance the insertion point if the subregisters differ between 668 // the source and the target (with the same super-register). 669 // Note: this case has never occured during tests. 670 if ((MS1.isReg() && NewSR == MS1.getSubReg()) || 671 (MS2.isReg() && NewSR == MS2.getSubReg())) 672 ++DefAt; 673 } 674 // Use "At", since "DefAt" may be end(). 675 MachineBasicBlock &B = *At->getParent(); 676 DebugLoc DL = At->getDebugLoc(); 677 auto ImpD = BuildMI(B, DefAt, DL, HII->get(TargetOpcode::IMPLICIT_DEF)) 678 .addReg(DR, RegState::Define, NewSR); 679 LIS->InsertMachineInstrInMaps(*ImpD); 680 LocalImpDefs.insert(&*ImpD); 681 } 682 683 // First, create the two invididual conditional transfers, and add each 684 // of them to the live intervals information. Do that first and then remove 685 // the old instruction from live intervals. 686 MachineInstr *TfrT = 687 genCondTfrFor(MI.getOperand(2), At, DR, DSR, MP, true, ReadUndef, false); 688 MachineInstr *TfrF = 689 genCondTfrFor(MI.getOperand(3), At, DR, DSR, MP, false, ReadUndef, true); 690 LIS->InsertMachineInstrInMaps(*TfrT); 691 LIS->InsertMachineInstrInMaps(*TfrF); 692 693 // Will need to recalculate live intervals for all registers in MI. 694 for (auto &Op : MI.operands()) 695 if (Op.isReg()) 696 UpdRegs.insert(Op.getReg()); 697 698 removeInstr(MI); 699 return true; 700 } 701 702 703 /// Split all MUX instructions in the given block into pairs of conditional 704 /// transfers. 705 bool HexagonExpandCondsets::splitInBlock(MachineBasicBlock &B, 706 std::set<unsigned> &UpdRegs) { 707 bool Changed = false; 708 MachineBasicBlock::iterator I, E, NextI; 709 for (I = B.begin(), E = B.end(); I != E; I = NextI) { 710 NextI = std::next(I); 711 if (isCondset(*I)) 712 Changed |= split(*I, UpdRegs); 713 } 714 return Changed; 715 } 716 717 718 bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) { 719 if (HII->isPredicated(*MI) || !HII->isPredicable(*MI)) 720 return false; 721 if (MI->hasUnmodeledSideEffects() || MI->mayStore()) 722 return false; 723 // Reject instructions with multiple defs (e.g. post-increment loads). 724 bool HasDef = false; 725 for (auto &Op : MI->operands()) { 726 if (!Op.isReg() || !Op.isDef()) 727 continue; 728 if (HasDef) 729 return false; 730 HasDef = true; 731 } 732 for (auto &Mo : MI->memoperands()) 733 if (Mo->isVolatile()) 734 return false; 735 return true; 736 } 737 738 739 /// Find the reaching definition for a predicated use of RD. The RD is used 740 /// under the conditions given by PredR and Cond, and this function will ignore 741 /// definitions that set RD under the opposite conditions. 742 MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD, 743 MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond) { 744 MachineBasicBlock &B = *UseIt->getParent(); 745 MachineBasicBlock::iterator I = UseIt, S = B.begin(); 746 if (I == S) 747 return 0; 748 749 bool PredValid = true; 750 do { 751 --I; 752 MachineInstr *MI = &*I; 753 // Check if this instruction can be ignored, i.e. if it is predicated 754 // on the complementary condition. 755 if (PredValid && HII->isPredicated(*MI)) { 756 if (MI->readsRegister(PredR) && (Cond != HII->isPredicatedTrue(*MI))) 757 continue; 758 } 759 760 // Check the defs. If the PredR is defined, invalidate it. If RD is 761 // defined, return the instruction or 0, depending on the circumstances. 762 for (auto &Op : MI->operands()) { 763 if (!Op.isReg() || !Op.isDef()) 764 continue; 765 RegisterRef RR = Op; 766 if (RR.Reg == PredR) { 767 PredValid = false; 768 continue; 769 } 770 if (RR.Reg != RD.Reg) 771 continue; 772 // If the "Reg" part agrees, there is still the subregister to check. 773 // If we are looking for vreg1:loreg, we can skip vreg1:hireg, but 774 // not vreg1 (w/o subregisters). 775 if (RR.Sub == RD.Sub) 776 return MI; 777 if (RR.Sub == 0 || RD.Sub == 0) 778 return 0; 779 // We have different subregisters, so we can continue looking. 780 } 781 } while (I != S); 782 783 return 0; 784 } 785 786 787 /// Check if the instruction MI can be safely moved over a set of instructions 788 /// whose side-effects (in terms of register defs and uses) are expressed in 789 /// the maps Defs and Uses. These maps reflect the conditional defs and uses 790 /// that depend on the same predicate register to allow moving instructions 791 /// over instructions predicated on the opposite condition. 792 bool HexagonExpandCondsets::canMoveOver(MachineInstr &MI, ReferenceMap &Defs, 793 ReferenceMap &Uses) { 794 // In order to be able to safely move MI over instructions that define 795 // "Defs" and use "Uses", no def operand from MI can be defined or used 796 // and no use operand can be defined. 797 for (auto &Op : MI.operands()) { 798 if (!Op.isReg()) 799 continue; 800 RegisterRef RR = Op; 801 // For physical register we would need to check register aliases, etc. 802 // and we don't want to bother with that. It would be of little value 803 // before the actual register rewriting (from virtual to physical). 804 if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) 805 return false; 806 // No redefs for any operand. 807 if (isRefInMap(RR, Defs, Exec_Then)) 808 return false; 809 // For defs, there cannot be uses. 810 if (Op.isDef() && isRefInMap(RR, Uses, Exec_Then)) 811 return false; 812 } 813 return true; 814 } 815 816 817 /// Check if the instruction accessing memory (TheI) can be moved to the 818 /// location ToI. 819 bool HexagonExpandCondsets::canMoveMemTo(MachineInstr &TheI, MachineInstr &ToI, 820 bool IsDown) { 821 bool IsLoad = TheI.mayLoad(), IsStore = TheI.mayStore(); 822 if (!IsLoad && !IsStore) 823 return true; 824 if (HII->areMemAccessesTriviallyDisjoint(TheI, ToI)) 825 return true; 826 if (TheI.hasUnmodeledSideEffects()) 827 return false; 828 829 MachineBasicBlock::iterator StartI = IsDown ? TheI : ToI; 830 MachineBasicBlock::iterator EndI = IsDown ? ToI : TheI; 831 bool Ordered = TheI.hasOrderedMemoryRef(); 832 833 // Search for aliased memory reference in (StartI, EndI). 834 for (MachineBasicBlock::iterator I = std::next(StartI); I != EndI; ++I) { 835 MachineInstr *MI = &*I; 836 if (MI->hasUnmodeledSideEffects()) 837 return false; 838 bool L = MI->mayLoad(), S = MI->mayStore(); 839 if (!L && !S) 840 continue; 841 if (Ordered && MI->hasOrderedMemoryRef()) 842 return false; 843 844 bool Conflict = (L && IsStore) || S; 845 if (Conflict) 846 return false; 847 } 848 return true; 849 } 850 851 852 /// Generate a predicated version of MI (where the condition is given via 853 /// PredR and Cond) at the point indicated by Where. 854 void HexagonExpandCondsets::predicateAt(const MachineOperand &DefOp, 855 MachineInstr &MI, 856 MachineBasicBlock::iterator Where, 857 const MachineOperand &PredOp, bool Cond, 858 std::set<unsigned> &UpdRegs) { 859 // The problem with updating live intervals is that we can move one def 860 // past another def. In particular, this can happen when moving an A2_tfrt 861 // over an A2_tfrf defining the same register. From the point of view of 862 // live intervals, these two instructions are two separate definitions, 863 // and each one starts another live segment. LiveIntervals's "handleMove" 864 // does not allow such moves, so we need to handle it ourselves. To avoid 865 // invalidating liveness data while we are using it, the move will be 866 // implemented in 4 steps: (1) add a clone of the instruction MI at the 867 // target location, (2) update liveness, (3) delete the old instruction, 868 // and (4) update liveness again. 869 870 MachineBasicBlock &B = *MI.getParent(); 871 DebugLoc DL = Where->getDebugLoc(); // "Where" points to an instruction. 872 unsigned Opc = MI.getOpcode(); 873 unsigned PredOpc = HII->getCondOpcode(Opc, !Cond); 874 MachineInstrBuilder MB = BuildMI(B, Where, DL, HII->get(PredOpc)); 875 unsigned Ox = 0, NP = MI.getNumOperands(); 876 // Skip all defs from MI first. 877 while (Ox < NP) { 878 MachineOperand &MO = MI.getOperand(Ox); 879 if (!MO.isReg() || !MO.isDef()) 880 break; 881 Ox++; 882 } 883 // Add the new def, then the predicate register, then the rest of the 884 // operands. 885 MB.addReg(DefOp.getReg(), getRegState(DefOp), DefOp.getSubReg()); 886 MB.addReg(PredOp.getReg(), PredOp.isUndef() ? RegState::Undef : 0, 887 PredOp.getSubReg()); 888 while (Ox < NP) { 889 MachineOperand &MO = MI.getOperand(Ox); 890 if (!MO.isReg() || !MO.isImplicit()) 891 MB.addOperand(MO); 892 Ox++; 893 } 894 895 MachineFunction &MF = *B.getParent(); 896 MachineInstr::mmo_iterator I = MI.memoperands_begin(); 897 unsigned NR = std::distance(I, MI.memoperands_end()); 898 MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(NR); 899 for (unsigned i = 0; i < NR; ++i) 900 MemRefs[i] = *I++; 901 MB.setMemRefs(MemRefs, MemRefs+NR); 902 903 MachineInstr *NewI = MB; 904 NewI->clearKillInfo(); 905 LIS->InsertMachineInstrInMaps(*NewI); 906 907 for (auto &Op : NewI->operands()) 908 if (Op.isReg()) 909 UpdRegs.insert(Op.getReg()); 910 } 911 912 913 /// In the range [First, Last], rename all references to the "old" register RO 914 /// to the "new" register RN, but only in instructions predicated on the given 915 /// condition. 916 void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN, 917 unsigned PredR, bool Cond, MachineBasicBlock::iterator First, 918 MachineBasicBlock::iterator Last) { 919 MachineBasicBlock::iterator End = std::next(Last); 920 for (MachineBasicBlock::iterator I = First; I != End; ++I) { 921 MachineInstr *MI = &*I; 922 // Do not touch instructions that are not predicated, or are predicated 923 // on the opposite condition. 924 if (!HII->isPredicated(*MI)) 925 continue; 926 if (!MI->readsRegister(PredR) || (Cond != HII->isPredicatedTrue(*MI))) 927 continue; 928 929 for (auto &Op : MI->operands()) { 930 if (!Op.isReg() || RO != RegisterRef(Op)) 931 continue; 932 Op.setReg(RN.Reg); 933 Op.setSubReg(RN.Sub); 934 // In practice, this isn't supposed to see any defs. 935 assert(!Op.isDef() && "Not expecting a def"); 936 } 937 } 938 } 939 940 941 /// For a given conditional copy, predicate the definition of the source of 942 /// the copy under the given condition (using the same predicate register as 943 /// the copy). 944 bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond, 945 std::set<unsigned> &UpdRegs) { 946 // TfrI - A2_tfr[tf] Instruction (not A2_tfrsi). 947 unsigned Opc = TfrI.getOpcode(); 948 (void)Opc; 949 assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf); 950 DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false") 951 << ": " << TfrI); 952 953 MachineOperand &MD = TfrI.getOperand(0); 954 MachineOperand &MP = TfrI.getOperand(1); 955 MachineOperand &MS = TfrI.getOperand(2); 956 // The source operand should be a <kill>. This is not strictly necessary, 957 // but it makes things a lot simpler. Otherwise, we would need to rename 958 // some registers, which would complicate the transformation considerably. 959 if (!MS.isKill()) 960 return false; 961 // Avoid predicating instructions that define a subregister if subregister 962 // liveness tracking is not enabled. 963 if (MD.getSubReg() && !MRI->shouldTrackSubRegLiveness(MD.getReg())) 964 return false; 965 966 RegisterRef RT(MS); 967 unsigned PredR = MP.getReg(); 968 MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond); 969 if (!DefI || !isPredicable(DefI)) 970 return false; 971 972 DEBUG(dbgs() << "Source def: " << *DefI); 973 974 // Collect the information about registers defined and used between the 975 // DefI and the TfrI. 976 // Map: reg -> bitmask of subregs 977 ReferenceMap Uses, Defs; 978 MachineBasicBlock::iterator DefIt = DefI, TfrIt = TfrI; 979 980 // Check if the predicate register is valid between DefI and TfrI. 981 // If it is, we can then ignore instructions predicated on the negated 982 // conditions when collecting def and use information. 983 bool PredValid = true; 984 for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) { 985 if (!I->modifiesRegister(PredR, 0)) 986 continue; 987 PredValid = false; 988 break; 989 } 990 991 for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) { 992 MachineInstr *MI = &*I; 993 // If this instruction is predicated on the same register, it could 994 // potentially be ignored. 995 // By default assume that the instruction executes on the same condition 996 // as TfrI (Exec_Then), and also on the opposite one (Exec_Else). 997 unsigned Exec = Exec_Then | Exec_Else; 998 if (PredValid && HII->isPredicated(*MI) && MI->readsRegister(PredR)) 999 Exec = (Cond == HII->isPredicatedTrue(*MI)) ? Exec_Then : Exec_Else; 1000 1001 for (auto &Op : MI->operands()) { 1002 if (!Op.isReg()) 1003 continue; 1004 // We don't want to deal with physical registers. The reason is that 1005 // they can be aliased with other physical registers. Aliased virtual 1006 // registers must share the same register number, and can only differ 1007 // in the subregisters, which we are keeping track of. Physical 1008 // registers ters no longer have subregisters---their super- and 1009 // subregisters are other physical registers, and we are not checking 1010 // that. 1011 RegisterRef RR = Op; 1012 if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) 1013 return false; 1014 1015 ReferenceMap &Map = Op.isDef() ? Defs : Uses; 1016 addRefToMap(RR, Map, Exec); 1017 } 1018 } 1019 1020 // The situation: 1021 // RT = DefI 1022 // ... 1023 // RD = TfrI ..., RT 1024 1025 // If the register-in-the-middle (RT) is used or redefined between 1026 // DefI and TfrI, we may not be able proceed with this transformation. 1027 // We can ignore a def that will not execute together with TfrI, and a 1028 // use that will. If there is such a use (that does execute together with 1029 // TfrI), we will not be able to move DefI down. If there is a use that 1030 // executed if TfrI's condition is false, then RT must be available 1031 // unconditionally (cannot be predicated). 1032 // Essentially, we need to be able to rename RT to RD in this segment. 1033 if (isRefInMap(RT, Defs, Exec_Then) || isRefInMap(RT, Uses, Exec_Else)) 1034 return false; 1035 RegisterRef RD = MD; 1036 // If the predicate register is defined between DefI and TfrI, the only 1037 // potential thing to do would be to move the DefI down to TfrI, and then 1038 // predicate. The reaching def (DefI) must be movable down to the location 1039 // of the TfrI. 1040 // If the target register of the TfrI (RD) is not used or defined between 1041 // DefI and TfrI, consider moving TfrI up to DefI. 1042 bool CanUp = canMoveOver(TfrI, Defs, Uses); 1043 bool CanDown = canMoveOver(*DefI, Defs, Uses); 1044 // The TfrI does not access memory, but DefI could. Check if it's safe 1045 // to move DefI down to TfrI. 1046 if (DefI->mayLoad() || DefI->mayStore()) 1047 if (!canMoveMemTo(*DefI, TfrI, true)) 1048 CanDown = false; 1049 1050 DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no") 1051 << ", can move down: " << (CanDown ? "yes\n" : "no\n")); 1052 MachineBasicBlock::iterator PastDefIt = std::next(DefIt); 1053 if (CanUp) 1054 predicateAt(MD, *DefI, PastDefIt, MP, Cond, UpdRegs); 1055 else if (CanDown) 1056 predicateAt(MD, *DefI, TfrIt, MP, Cond, UpdRegs); 1057 else 1058 return false; 1059 1060 if (RT != RD) { 1061 renameInRange(RT, RD, PredR, Cond, PastDefIt, TfrIt); 1062 UpdRegs.insert(RT.Reg); 1063 } 1064 1065 removeInstr(TfrI); 1066 removeInstr(*DefI); 1067 return true; 1068 } 1069 1070 1071 /// Predicate all cases of conditional copies in the specified block. 1072 bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B, 1073 std::set<unsigned> &UpdRegs) { 1074 bool Changed = false; 1075 MachineBasicBlock::iterator I, E, NextI; 1076 for (I = B.begin(), E = B.end(); I != E; I = NextI) { 1077 NextI = std::next(I); 1078 unsigned Opc = I->getOpcode(); 1079 if (Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf) { 1080 bool Done = predicate(*I, (Opc == Hexagon::A2_tfrt), UpdRegs); 1081 if (!Done) { 1082 // If we didn't predicate I, we may need to remove it in case it is 1083 // an "identity" copy, e.g. vreg1 = A2_tfrt vreg2, vreg1. 1084 if (RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2))) { 1085 for (auto &Op : I->operands()) 1086 if (Op.isReg()) 1087 UpdRegs.insert(Op.getReg()); 1088 removeInstr(*I); 1089 } 1090 } 1091 Changed |= Done; 1092 } 1093 } 1094 return Changed; 1095 } 1096 1097 1098 bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) { 1099 if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) 1100 return false; 1101 const TargetRegisterClass *RC = MRI->getRegClass(RR.Reg); 1102 if (RC == &Hexagon::IntRegsRegClass) { 1103 BW = 32; 1104 return true; 1105 } 1106 if (RC == &Hexagon::DoubleRegsRegClass) { 1107 BW = (RR.Sub != 0) ? 32 : 64; 1108 return true; 1109 } 1110 return false; 1111 } 1112 1113 1114 bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) { 1115 for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { 1116 LiveRange::Segment &LR = *I; 1117 // Range must start at a register... 1118 if (!LR.start.isRegister()) 1119 return false; 1120 // ...and end in a register or in a dead slot. 1121 if (!LR.end.isRegister() && !LR.end.isDead()) 1122 return false; 1123 } 1124 return true; 1125 } 1126 1127 1128 bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) { 1129 if (CoaLimitActive) { 1130 if (CoaCounter >= CoaLimit) 1131 return false; 1132 CoaCounter++; 1133 } 1134 unsigned BW1, BW2; 1135 if (!isIntReg(R1, BW1) || !isIntReg(R2, BW2) || BW1 != BW2) 1136 return false; 1137 if (MRI->isLiveIn(R1.Reg)) 1138 return false; 1139 if (MRI->isLiveIn(R2.Reg)) 1140 return false; 1141 1142 LiveInterval &L1 = LIS->getInterval(R1.Reg); 1143 LiveInterval &L2 = LIS->getInterval(R2.Reg); 1144 bool Overlap = L1.overlaps(L2); 1145 1146 DEBUG(dbgs() << "compatible registers: (" 1147 << (Overlap ? "overlap" : "disjoint") << ")\n " 1148 << PrintReg(R1.Reg, TRI, R1.Sub) << " " << L1 << "\n " 1149 << PrintReg(R2.Reg, TRI, R2.Sub) << " " << L2 << "\n"); 1150 if (R1.Sub || R2.Sub) 1151 return false; 1152 if (Overlap) 1153 return false; 1154 1155 // Coalescing could have a negative impact on scheduling, so try to limit 1156 // to some reasonable extent. Only consider coalescing segments, when one 1157 // of them does not cross basic block boundaries. 1158 if (!isIntraBlocks(L1) && !isIntraBlocks(L2)) 1159 return false; 1160 1161 MRI->replaceRegWith(R2.Reg, R1.Reg); 1162 1163 // Move all live segments from L2 to L1. 1164 typedef DenseMap<VNInfo*,VNInfo*> ValueInfoMap; 1165 ValueInfoMap VM; 1166 for (LiveInterval::iterator I = L2.begin(), E = L2.end(); I != E; ++I) { 1167 VNInfo *NewVN, *OldVN = I->valno; 1168 ValueInfoMap::iterator F = VM.find(OldVN); 1169 if (F == VM.end()) { 1170 NewVN = L1.getNextValue(I->valno->def, LIS->getVNInfoAllocator()); 1171 VM.insert(std::make_pair(OldVN, NewVN)); 1172 } else { 1173 NewVN = F->second; 1174 } 1175 L1.addSegment(LiveRange::Segment(I->start, I->end, NewVN)); 1176 } 1177 while (L2.begin() != L2.end()) 1178 L2.removeSegment(*L2.begin()); 1179 1180 updateKillFlags(R1.Reg); 1181 DEBUG(dbgs() << "coalesced: " << L1 << "\n"); 1182 L1.verify(); 1183 1184 return true; 1185 } 1186 1187 1188 /// Attempt to coalesce one of the source registers to a MUX intruction with 1189 /// the destination register. This could lead to having only one predicated 1190 /// instruction in the end instead of two. 1191 bool HexagonExpandCondsets::coalesceSegments(MachineFunction &MF) { 1192 SmallVector<MachineInstr*,16> Condsets; 1193 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { 1194 MachineBasicBlock &B = *I; 1195 for (MachineBasicBlock::iterator J = B.begin(), F = B.end(); J != F; ++J) { 1196 MachineInstr *MI = &*J; 1197 if (!isCondset(*MI)) 1198 continue; 1199 MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3); 1200 if (!S1.isReg() && !S2.isReg()) 1201 continue; 1202 Condsets.push_back(MI); 1203 } 1204 } 1205 1206 bool Changed = false; 1207 for (unsigned i = 0, n = Condsets.size(); i < n; ++i) { 1208 MachineInstr *CI = Condsets[i]; 1209 RegisterRef RD = CI->getOperand(0); 1210 RegisterRef RP = CI->getOperand(1); 1211 MachineOperand &S1 = CI->getOperand(2), &S2 = CI->getOperand(3); 1212 bool Done = false; 1213 // Consider this case: 1214 // vreg1 = instr1 ... 1215 // vreg2 = instr2 ... 1216 // vreg0 = C2_mux ..., vreg1, vreg2 1217 // If vreg0 was coalesced with vreg1, we could end up with the following 1218 // code: 1219 // vreg0 = instr1 ... 1220 // vreg2 = instr2 ... 1221 // vreg0 = A2_tfrf ..., vreg2 1222 // which will later become: 1223 // vreg0 = instr1 ... 1224 // vreg0 = instr2_cNotPt ... 1225 // i.e. there will be an unconditional definition (instr1) of vreg0 1226 // followed by a conditional one. The output dependency was there before 1227 // and it unavoidable, but if instr1 is predicable, we will no longer be 1228 // able to predicate it here. 1229 // To avoid this scenario, don't coalesce the destination register with 1230 // a source register that is defined by a predicable instruction. 1231 if (S1.isReg()) { 1232 RegisterRef RS = S1; 1233 MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, true); 1234 if (!RDef || !HII->isPredicable(*RDef)) 1235 Done = coalesceRegisters(RD, RegisterRef(S1)); 1236 } 1237 if (!Done && S2.isReg()) { 1238 RegisterRef RS = S2; 1239 MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, false); 1240 if (!RDef || !HII->isPredicable(*RDef)) 1241 Done = coalesceRegisters(RD, RegisterRef(S2)); 1242 } 1243 Changed |= Done; 1244 } 1245 return Changed; 1246 } 1247 1248 1249 bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { 1250 if (skipFunction(*MF.getFunction())) 1251 return false; 1252 1253 HII = static_cast<const HexagonInstrInfo*>(MF.getSubtarget().getInstrInfo()); 1254 TRI = MF.getSubtarget().getRegisterInfo(); 1255 MDT = &getAnalysis<MachineDominatorTree>(); 1256 LIS = &getAnalysis<LiveIntervals>(); 1257 MRI = &MF.getRegInfo(); 1258 LocalImpDefs.clear(); 1259 1260 DEBUG(LIS->print(dbgs() << "Before expand-condsets\n", 1261 MF.getFunction()->getParent())); 1262 1263 bool Changed = false; 1264 std::set<unsigned> SplitUpd, PredUpd; 1265 1266 // Try to coalesce the target of a mux with one of its sources. 1267 // This could eliminate a register copy in some circumstances. 1268 Changed |= coalesceSegments(MF); 1269 1270 // First, simply split all muxes into a pair of conditional transfers 1271 // and update the live intervals to reflect the new arrangement. The 1272 // goal is to update the kill flags, since predication will rely on 1273 // them. 1274 for (auto &B : MF) 1275 Changed |= splitInBlock(B, SplitUpd); 1276 updateLiveness(SplitUpd, true, true, false); 1277 1278 // Traverse all blocks and collapse predicable instructions feeding 1279 // conditional transfers into predicated instructions. 1280 // Walk over all the instructions again, so we may catch pre-existing 1281 // cases that were not created in the previous step. 1282 for (auto &B : MF) 1283 Changed |= predicateInBlock(B, PredUpd); 1284 1285 updateLiveness(PredUpd, true, true, true); 1286 // Remove from SplitUpd all registers contained in PredUpd to avoid 1287 // unnecessary liveness recalculation. 1288 std::set<unsigned> Diff; 1289 std::set_difference(SplitUpd.begin(), SplitUpd.end(), 1290 PredUpd.begin(), PredUpd.end(), 1291 std::inserter(Diff, Diff.begin())); 1292 updateLiveness(Diff, false, false, true); 1293 1294 for (auto *ImpD : LocalImpDefs) 1295 removeInstr(*ImpD); 1296 1297 DEBUG({ 1298 if (Changed) 1299 LIS->print(dbgs() << "After expand-condsets\n", 1300 MF.getFunction()->getParent()); 1301 }); 1302 1303 return Changed; 1304 } 1305 1306 1307 //===----------------------------------------------------------------------===// 1308 // Public Constructor Functions 1309 //===----------------------------------------------------------------------===// 1310 1311 FunctionPass *llvm::createHexagonExpandCondsets() { 1312 return new HexagonExpandCondsets(); 1313 } 1314