1 //===--------------- PPCVSXFMAMutate.cpp - VSX FMA Mutation ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass mutates the form of VSX FMA instructions to avoid unnecessary 11 // copies. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "PPCInstrInfo.h" 16 #include "MCTargetDesc/PPCPredicates.h" 17 #include "PPC.h" 18 #include "PPCInstrBuilder.h" 19 #include "PPCMachineFunctionInfo.h" 20 #include "PPCTargetMachine.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunctionPass.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineMemOperand.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/PseudoSourceValue.h" 30 #include "llvm/CodeGen/ScheduleDAG.h" 31 #include "llvm/CodeGen/SlotIndexes.h" 32 #include "llvm/MC/MCAsmInfo.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Support/TargetRegistry.h" 37 #include "llvm/Support/raw_ostream.h" 38 39 using namespace llvm; 40 41 // Temporarily disable FMA mutation by default, since it doesn't handle 42 // cross-basic-block intervals well. 43 // See: http://lists.llvm.org/pipermail/llvm-dev/2016-February/095669.html 44 // http://reviews.llvm.org/D17087 45 static cl::opt<bool> DisableVSXFMAMutate( 46 "disable-ppc-vsx-fma-mutation", 47 cl::desc("Disable VSX FMA instruction mutation"), cl::init(true), 48 cl::Hidden); 49 50 #define DEBUG_TYPE "ppc-vsx-fma-mutate" 51 52 namespace llvm { namespace PPC { 53 int getAltVSXFMAOpcode(uint16_t Opcode); 54 } } 55 56 namespace { 57 // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers 58 // (Altivec and scalar floating-point registers), we need to transform the 59 // copies into subregister copies with other restrictions. 60 struct PPCVSXFMAMutate : public MachineFunctionPass { 61 static char ID; 62 PPCVSXFMAMutate() : MachineFunctionPass(ID) { 63 initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); 64 } 65 66 LiveIntervals *LIS; 67 const PPCInstrInfo *TII; 68 69 protected: 70 bool processBlock(MachineBasicBlock &MBB) { 71 bool Changed = false; 72 73 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 74 const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); 75 for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); 76 I != IE; ++I) { 77 MachineInstr *MI = I; 78 79 // The default (A-type) VSX FMA form kills the addend (it is taken from 80 // the target register, which is then updated to reflect the result of 81 // the FMA). If the instruction, however, kills one of the registers 82 // used for the product, then we can use the M-form instruction (which 83 // will take that value from the to-be-defined register). 84 85 int AltOpc = PPC::getAltVSXFMAOpcode(MI->getOpcode()); 86 if (AltOpc == -1) 87 continue; 88 89 // This pass is run after register coalescing, and so we're looking for 90 // a situation like this: 91 // ... 92 // %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9 93 // %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16, 94 // %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16 95 // ... 96 // %vreg9<def,tied1> = XSMADDADP %vreg9<tied0>, %vreg17, %vreg19, 97 // %RM<imp-use>; VSLRC:%vreg9,%vreg17,%vreg19 98 // ... 99 // Where we can eliminate the copy by changing from the A-type to the 100 // M-type instruction. Specifically, for this example, this means: 101 // %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16, 102 // %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16 103 // is replaced by: 104 // %vreg16<def,tied1> = XSMADDMDP %vreg16<tied0>, %vreg18, %vreg9, 105 // %RM<imp-use>; VSLRC:%vreg16,%vreg18,%vreg9 106 // and we remove: %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9 107 108 SlotIndex FMAIdx = LIS->getInstructionIndex(*MI); 109 110 VNInfo *AddendValNo = 111 LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn(); 112 113 // This can be null if the register is undef. 114 if (!AddendValNo) 115 continue; 116 117 MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def); 118 119 // The addend and this instruction must be in the same block. 120 121 if (!AddendMI || AddendMI->getParent() != MI->getParent()) 122 continue; 123 124 // The addend must be a full copy within the same register class. 125 126 if (!AddendMI->isFullCopy()) 127 continue; 128 129 unsigned AddendSrcReg = AddendMI->getOperand(1).getReg(); 130 if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) { 131 if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) != 132 MRI.getRegClass(AddendSrcReg)) 133 continue; 134 } else { 135 // If AddendSrcReg is a physical register, make sure the destination 136 // register class contains it. 137 if (!MRI.getRegClass(AddendMI->getOperand(0).getReg()) 138 ->contains(AddendSrcReg)) 139 continue; 140 } 141 142 // In theory, there could be other uses of the addend copy before this 143 // fma. We could deal with this, but that would require additional 144 // logic below and I suspect it will not occur in any relevant 145 // situations. Additionally, check whether the copy source is killed 146 // prior to the fma. In order to replace the addend here with the 147 // source of the copy, it must still be live here. We can't use 148 // interval testing for a physical register, so as long as we're 149 // walking the MIs we may as well test liveness here. 150 // 151 // FIXME: There is a case that occurs in practice, like this: 152 // %vreg9<def> = COPY %F1; VSSRC:%vreg9 153 // ... 154 // %vreg6<def> = COPY %vreg9; VSSRC:%vreg6,%vreg9 155 // %vreg7<def> = COPY %vreg9; VSSRC:%vreg7,%vreg9 156 // %vreg9<def,tied1> = XSMADDASP %vreg9<tied0>, %vreg1, %vreg4; VSSRC: 157 // %vreg6<def,tied1> = XSMADDASP %vreg6<tied0>, %vreg1, %vreg2; VSSRC: 158 // %vreg7<def,tied1> = XSMADDASP %vreg7<tied0>, %vreg1, %vreg3; VSSRC: 159 // which prevents an otherwise-profitable transformation. 160 bool OtherUsers = false, KillsAddendSrc = false; 161 for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI); 162 J != JE; --J) { 163 if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) { 164 OtherUsers = true; 165 break; 166 } 167 if (J->modifiesRegister(AddendSrcReg, TRI) || 168 J->killsRegister(AddendSrcReg, TRI)) { 169 KillsAddendSrc = true; 170 break; 171 } 172 } 173 174 if (OtherUsers || KillsAddendSrc) 175 continue; 176 177 178 // The transformation doesn't work well with things like: 179 // %vreg5 = A-form-op %vreg5, %vreg11, %vreg5; 180 // unless vreg11 is also a kill, so skip when it is not, 181 // and check operand 3 to see it is also a kill to handle the case: 182 // %vreg5 = A-form-op %vreg5, %vreg5, %vreg11; 183 // where vreg5 and vreg11 are both kills. This case would be skipped 184 // otherwise. 185 unsigned OldFMAReg = MI->getOperand(0).getReg(); 186 187 // Find one of the product operands that is killed by this instruction. 188 unsigned KilledProdOp = 0, OtherProdOp = 0; 189 unsigned Reg2 = MI->getOperand(2).getReg(); 190 unsigned Reg3 = MI->getOperand(3).getReg(); 191 if (LIS->getInterval(Reg2).Query(FMAIdx).isKill() 192 && Reg2 != OldFMAReg) { 193 KilledProdOp = 2; 194 OtherProdOp = 3; 195 } else if (LIS->getInterval(Reg3).Query(FMAIdx).isKill() 196 && Reg3 != OldFMAReg) { 197 KilledProdOp = 3; 198 OtherProdOp = 2; 199 } 200 201 // If there are no usable killed product operands, then this 202 // transformation is likely not profitable. 203 if (!KilledProdOp) 204 continue; 205 206 // If the addend copy is used only by this MI, then the addend source 207 // register is likely not live here. This could be fixed (based on the 208 // legality checks above, the live range for the addend source register 209 // could be extended), but it seems likely that such a trivial copy can 210 // be coalesced away later, and thus is not worth the effort. 211 if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) && 212 !LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) 213 continue; 214 215 // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3. 216 217 unsigned KilledProdReg = MI->getOperand(KilledProdOp).getReg(); 218 unsigned OtherProdReg = MI->getOperand(OtherProdOp).getReg(); 219 220 unsigned AddSubReg = AddendMI->getOperand(1).getSubReg(); 221 unsigned KilledProdSubReg = MI->getOperand(KilledProdOp).getSubReg(); 222 unsigned OtherProdSubReg = MI->getOperand(OtherProdOp).getSubReg(); 223 224 bool AddRegKill = AddendMI->getOperand(1).isKill(); 225 bool KilledProdRegKill = MI->getOperand(KilledProdOp).isKill(); 226 bool OtherProdRegKill = MI->getOperand(OtherProdOp).isKill(); 227 228 bool AddRegUndef = AddendMI->getOperand(1).isUndef(); 229 bool KilledProdRegUndef = MI->getOperand(KilledProdOp).isUndef(); 230 bool OtherProdRegUndef = MI->getOperand(OtherProdOp).isUndef(); 231 232 // If there isn't a class that fits, we can't perform the transform. 233 // This is needed for correctness with a mixture of VSX and Altivec 234 // instructions to make sure that a low VSX register is not assigned to 235 // the Altivec instruction. 236 if (!MRI.constrainRegClass(KilledProdReg, 237 MRI.getRegClass(OldFMAReg))) 238 continue; 239 240 assert(OldFMAReg == AddendMI->getOperand(0).getReg() && 241 "Addend copy not tied to old FMA output!"); 242 243 DEBUG(dbgs() << "VSX FMA Mutation:\n " << *MI;); 244 245 MI->getOperand(0).setReg(KilledProdReg); 246 MI->getOperand(1).setReg(KilledProdReg); 247 MI->getOperand(3).setReg(AddendSrcReg); 248 249 MI->getOperand(0).setSubReg(KilledProdSubReg); 250 MI->getOperand(1).setSubReg(KilledProdSubReg); 251 MI->getOperand(3).setSubReg(AddSubReg); 252 253 MI->getOperand(1).setIsKill(KilledProdRegKill); 254 MI->getOperand(3).setIsKill(AddRegKill); 255 256 MI->getOperand(1).setIsUndef(KilledProdRegUndef); 257 MI->getOperand(3).setIsUndef(AddRegUndef); 258 259 MI->setDesc(TII->get(AltOpc)); 260 261 // If the addend is also a multiplicand, replace it with the addend 262 // source in both places. 263 if (OtherProdReg == AddendMI->getOperand(0).getReg()) { 264 MI->getOperand(2).setReg(AddendSrcReg); 265 MI->getOperand(2).setSubReg(AddSubReg); 266 MI->getOperand(2).setIsKill(AddRegKill); 267 MI->getOperand(2).setIsUndef(AddRegUndef); 268 } else { 269 MI->getOperand(2).setReg(OtherProdReg); 270 MI->getOperand(2).setSubReg(OtherProdSubReg); 271 MI->getOperand(2).setIsKill(OtherProdRegKill); 272 MI->getOperand(2).setIsUndef(OtherProdRegUndef); 273 } 274 275 DEBUG(dbgs() << " -> " << *MI); 276 277 // The killed product operand was killed here, so we can reuse it now 278 // for the result of the fma. 279 280 LiveInterval &FMAInt = LIS->getInterval(OldFMAReg); 281 VNInfo *FMAValNo = FMAInt.getVNInfoAt(FMAIdx.getRegSlot()); 282 for (auto UI = MRI.reg_nodbg_begin(OldFMAReg), UE = MRI.reg_nodbg_end(); 283 UI != UE;) { 284 MachineOperand &UseMO = *UI; 285 MachineInstr *UseMI = UseMO.getParent(); 286 ++UI; 287 288 // Don't replace the result register of the copy we're about to erase. 289 if (UseMI == AddendMI) 290 continue; 291 292 UseMO.substVirtReg(KilledProdReg, KilledProdSubReg, *TRI); 293 } 294 295 // Extend the live intervals of the killed product operand to hold the 296 // fma result. 297 298 LiveInterval &NewFMAInt = LIS->getInterval(KilledProdReg); 299 for (LiveInterval::iterator AI = FMAInt.begin(), AE = FMAInt.end(); 300 AI != AE; ++AI) { 301 // Don't add the segment that corresponds to the original copy. 302 if (AI->valno == AddendValNo) 303 continue; 304 305 VNInfo *NewFMAValNo = 306 NewFMAInt.getNextValue(AI->start, 307 LIS->getVNInfoAllocator()); 308 309 NewFMAInt.addSegment(LiveInterval::Segment(AI->start, AI->end, 310 NewFMAValNo)); 311 } 312 DEBUG(dbgs() << " extended: " << NewFMAInt << '\n'); 313 314 // Extend the live interval of the addend source (it might end at the 315 // copy to be removed, or somewhere in between there and here). This 316 // is necessary only if it is a physical register. 317 if (!TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) 318 for (MCRegUnitIterator Units(AddendSrcReg, TRI); Units.isValid(); 319 ++Units) { 320 unsigned Unit = *Units; 321 322 LiveRange &AddendSrcRange = LIS->getRegUnit(Unit); 323 AddendSrcRange.extendInBlock(LIS->getMBBStartIdx(&MBB), 324 FMAIdx.getRegSlot()); 325 DEBUG(dbgs() << " extended: " << AddendSrcRange << '\n'); 326 } 327 328 FMAInt.removeValNo(FMAValNo); 329 DEBUG(dbgs() << " trimmed: " << FMAInt << '\n'); 330 331 // Remove the (now unused) copy. 332 333 DEBUG(dbgs() << " removing: " << *AddendMI << '\n'); 334 LIS->RemoveMachineInstrFromMaps(*AddendMI); 335 AddendMI->eraseFromParent(); 336 337 Changed = true; 338 } 339 340 return Changed; 341 } 342 343 public: 344 bool runOnMachineFunction(MachineFunction &MF) override { 345 if (skipFunction(*MF.getFunction())) 346 return false; 347 348 // If we don't have VSX then go ahead and return without doing 349 // anything. 350 const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>(); 351 if (!STI.hasVSX()) 352 return false; 353 354 LIS = &getAnalysis<LiveIntervals>(); 355 356 TII = STI.getInstrInfo(); 357 358 bool Changed = false; 359 360 if (DisableVSXFMAMutate) 361 return Changed; 362 363 for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { 364 MachineBasicBlock &B = *I++; 365 if (processBlock(B)) 366 Changed = true; 367 } 368 369 return Changed; 370 } 371 372 void getAnalysisUsage(AnalysisUsage &AU) const override { 373 AU.addRequired<LiveIntervals>(); 374 AU.addPreserved<LiveIntervals>(); 375 AU.addRequired<SlotIndexes>(); 376 AU.addPreserved<SlotIndexes>(); 377 MachineFunctionPass::getAnalysisUsage(AU); 378 } 379 }; 380 } 381 382 INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE, 383 "PowerPC VSX FMA Mutation", false, false) 384 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 385 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 386 INITIALIZE_PASS_END(PPCVSXFMAMutate, DEBUG_TYPE, 387 "PowerPC VSX FMA Mutation", false, false) 388 389 char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID; 390 391 char PPCVSXFMAMutate::ID = 0; 392 FunctionPass *llvm::createPPCVSXFMAMutatePass() { 393 return new PPCVSXFMAMutate(); 394 } 395