1 //===--------------------- R600MergeVectorRegisters.cpp -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// This pass merges inputs of swizzeable instructions into vector sharing 12 /// common data and/or have enough undef subreg using swizzle abilities. 13 /// 14 /// For instance let's consider the following pseudo code : 15 /// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 16 /// ... 17 /// vreg7<def> = REG_SEQ vreg1, sub0, vreg3, sub1, undef, sub2, vreg4, sub3 18 /// (swizzable Inst) vreg7, SwizzleMask : sub0, sub1, sub2, sub3 19 /// 20 /// is turned into : 21 /// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 22 /// ... 23 /// vreg7<def> = INSERT_SUBREG vreg4, sub3 24 /// (swizzable Inst) vreg7, SwizzleMask : sub0, sub2, sub1, sub3 25 /// 26 /// This allow regalloc to reduce register pressure for vector registers and 27 /// to reduce MOV count. 28 //===----------------------------------------------------------------------===// 29 30 #include "llvm/Support/Debug.h" 31 #include "AMDGPU.h" 32 #include "R600InstrInfo.h" 33 #include "llvm/CodeGen/DFAPacketizer.h" 34 #include "llvm/CodeGen/MachineDominators.h" 35 #include "llvm/CodeGen/MachineFunctionPass.h" 36 #include "llvm/CodeGen/MachineInstrBuilder.h" 37 #include "llvm/CodeGen/MachineLoopInfo.h" 38 #include "llvm/CodeGen/MachineRegisterInfo.h" 39 #include "llvm/CodeGen/Passes.h" 40 #include "llvm/Support/raw_ostream.h" 41 42 using namespace llvm; 43 44 #define DEBUG_TYPE "vec-merger" 45 46 namespace { 47 48 static bool 49 isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) { 50 for (MachineRegisterInfo::def_instr_iterator It = MRI.def_instr_begin(Reg), 51 E = MRI.def_instr_end(); It != E; ++It) { 52 return (*It).isImplicitDef(); 53 } 54 if (MRI.isReserved(Reg)) { 55 return false; 56 } 57 llvm_unreachable("Reg without a def"); 58 return false; 59 } 60 61 class RegSeqInfo { 62 public: 63 MachineInstr *Instr; 64 DenseMap<unsigned, unsigned> RegToChan; 65 std::vector<unsigned> UndefReg; 66 RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) { 67 assert(MI->getOpcode() == AMDGPU::REG_SEQUENCE); 68 for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) { 69 MachineOperand &MO = Instr->getOperand(i); 70 unsigned Chan = Instr->getOperand(i + 1).getImm(); 71 if (isImplicitlyDef(MRI, MO.getReg())) 72 UndefReg.push_back(Chan); 73 else 74 RegToChan[MO.getReg()] = Chan; 75 } 76 } 77 RegSeqInfo() {} 78 79 bool operator==(const RegSeqInfo &RSI) const { 80 return RSI.Instr == Instr; 81 } 82 }; 83 84 class R600VectorRegMerger : public MachineFunctionPass { 85 private: 86 MachineRegisterInfo *MRI; 87 const R600InstrInfo *TII; 88 bool canSwizzle(const MachineInstr &) const; 89 bool areAllUsesSwizzeable(unsigned Reg) const; 90 void SwizzleInput(MachineInstr &, 91 const std::vector<std::pair<unsigned, unsigned> > &) const; 92 bool tryMergeVector(const RegSeqInfo *, RegSeqInfo *, 93 std::vector<std::pair<unsigned, unsigned> > &Remap) const; 94 bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 95 std::vector<std::pair<unsigned, unsigned> > &RemapChan); 96 bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 97 std::vector<std::pair<unsigned, unsigned> > &RemapChan); 98 MachineInstr *RebuildVector(RegSeqInfo *MI, 99 const RegSeqInfo *BaseVec, 100 const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const; 101 void RemoveMI(MachineInstr *); 102 void trackRSI(const RegSeqInfo &RSI); 103 104 typedef DenseMap<unsigned, std::vector<MachineInstr *> > InstructionSetMap; 105 DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq; 106 InstructionSetMap PreviousRegSeqByReg; 107 InstructionSetMap PreviousRegSeqByUndefCount; 108 public: 109 static char ID; 110 R600VectorRegMerger(TargetMachine &tm) : MachineFunctionPass(ID), 111 TII(nullptr) { } 112 113 void getAnalysisUsage(AnalysisUsage &AU) const override { 114 AU.setPreservesCFG(); 115 AU.addRequired<MachineDominatorTree>(); 116 AU.addPreserved<MachineDominatorTree>(); 117 AU.addRequired<MachineLoopInfo>(); 118 AU.addPreserved<MachineLoopInfo>(); 119 MachineFunctionPass::getAnalysisUsage(AU); 120 } 121 122 const char *getPassName() const override { 123 return "R600 Vector Registers Merge Pass"; 124 } 125 126 bool runOnMachineFunction(MachineFunction &Fn) override; 127 }; 128 129 char R600VectorRegMerger::ID = 0; 130 131 bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI) 132 const { 133 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 134 return true; 135 switch (MI.getOpcode()) { 136 case AMDGPU::R600_ExportSwz: 137 case AMDGPU::EG_ExportSwz: 138 return true; 139 default: 140 return false; 141 } 142 } 143 144 bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched, 145 RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned> > &Remap) 146 const { 147 unsigned CurrentUndexIdx = 0; 148 for (DenseMap<unsigned, unsigned>::iterator It = ToMerge->RegToChan.begin(), 149 E = ToMerge->RegToChan.end(); It != E; ++It) { 150 DenseMap<unsigned, unsigned>::const_iterator PosInUntouched = 151 Untouched->RegToChan.find((*It).first); 152 if (PosInUntouched != Untouched->RegToChan.end()) { 153 Remap.push_back(std::pair<unsigned, unsigned> 154 ((*It).second, (*PosInUntouched).second)); 155 continue; 156 } 157 if (CurrentUndexIdx >= Untouched->UndefReg.size()) 158 return false; 159 Remap.push_back(std::pair<unsigned, unsigned> 160 ((*It).second, Untouched->UndefReg[CurrentUndexIdx++])); 161 } 162 163 return true; 164 } 165 166 static 167 unsigned getReassignedChan( 168 const std::vector<std::pair<unsigned, unsigned> > &RemapChan, 169 unsigned Chan) { 170 for (unsigned j = 0, je = RemapChan.size(); j < je; j++) { 171 if (RemapChan[j].first == Chan) 172 return RemapChan[j].second; 173 } 174 llvm_unreachable("Chan wasn't reassigned"); 175 } 176 177 MachineInstr *R600VectorRegMerger::RebuildVector( 178 RegSeqInfo *RSI, const RegSeqInfo *BaseRSI, 179 const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const { 180 unsigned Reg = RSI->Instr->getOperand(0).getReg(); 181 MachineBasicBlock::iterator Pos = RSI->Instr; 182 MachineBasicBlock &MBB = *Pos->getParent(); 183 DebugLoc DL = Pos->getDebugLoc(); 184 185 unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg(); 186 DenseMap<unsigned, unsigned> UpdatedRegToChan = BaseRSI->RegToChan; 187 std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg; 188 for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(), 189 E = RSI->RegToChan.end(); It != E; ++It) { 190 unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 191 unsigned SubReg = (*It).first; 192 unsigned Swizzle = (*It).second; 193 unsigned Chan = getReassignedChan(RemapChan, Swizzle); 194 195 MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::INSERT_SUBREG), 196 DstReg) 197 .addReg(SrcVec) 198 .addReg(SubReg) 199 .addImm(Chan); 200 UpdatedRegToChan[SubReg] = Chan; 201 std::vector<unsigned>::iterator ChanPos = 202 std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan); 203 if (ChanPos != UpdatedUndef.end()) 204 UpdatedUndef.erase(ChanPos); 205 assert(std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan) == 206 UpdatedUndef.end() && 207 "UpdatedUndef shouldn't contain Chan more than once!"); 208 DEBUG(dbgs() << " ->"; Tmp->dump();); 209 (void)Tmp; 210 SrcVec = DstReg; 211 } 212 Pos = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg) 213 .addReg(SrcVec); 214 DEBUG(dbgs() << " ->"; Pos->dump();); 215 216 DEBUG(dbgs() << " Updating Swizzle:\n"); 217 for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg), 218 E = MRI->use_instr_end(); It != E; ++It) { 219 DEBUG(dbgs() << " ";(*It).dump(); dbgs() << " ->"); 220 SwizzleInput(*It, RemapChan); 221 DEBUG((*It).dump()); 222 } 223 RSI->Instr->eraseFromParent(); 224 225 // Update RSI 226 RSI->Instr = Pos; 227 RSI->RegToChan = UpdatedRegToChan; 228 RSI->UndefReg = UpdatedUndef; 229 230 return Pos; 231 } 232 233 void R600VectorRegMerger::RemoveMI(MachineInstr *MI) { 234 for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(), 235 E = PreviousRegSeqByReg.end(); It != E; ++It) { 236 std::vector<MachineInstr *> &MIs = (*It).second; 237 MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end()); 238 } 239 for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(), 240 E = PreviousRegSeqByUndefCount.end(); It != E; ++It) { 241 std::vector<MachineInstr *> &MIs = (*It).second; 242 MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end()); 243 } 244 } 245 246 void R600VectorRegMerger::SwizzleInput(MachineInstr &MI, 247 const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const { 248 unsigned Offset; 249 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 250 Offset = 2; 251 else 252 Offset = 3; 253 for (unsigned i = 0; i < 4; i++) { 254 unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1; 255 for (unsigned j = 0, e = RemapChan.size(); j < e; j++) { 256 if (RemapChan[j].first == Swizzle) { 257 MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1); 258 break; 259 } 260 } 261 } 262 } 263 264 bool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const { 265 for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg), 266 E = MRI->use_instr_end(); It != E; ++It) { 267 if (!canSwizzle(*It)) 268 return false; 269 } 270 return true; 271 } 272 273 bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI, 274 RegSeqInfo &CompatibleRSI, 275 std::vector<std::pair<unsigned, unsigned> > &RemapChan) { 276 for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(), 277 MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) { 278 if (!MOp->isReg()) 279 continue; 280 if (PreviousRegSeqByReg[MOp->getReg()].empty()) 281 continue; 282 std::vector<MachineInstr *> MIs = PreviousRegSeqByReg[MOp->getReg()]; 283 for (unsigned i = 0, e = MIs.size(); i < e; i++) { 284 CompatibleRSI = PreviousRegSeq[MIs[i]]; 285 if (RSI == CompatibleRSI) 286 continue; 287 if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan)) 288 return true; 289 } 290 } 291 return false; 292 } 293 294 bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI, 295 RegSeqInfo &CompatibleRSI, 296 std::vector<std::pair<unsigned, unsigned> > &RemapChan) { 297 unsigned NeededUndefs = 4 - RSI.UndefReg.size(); 298 if (PreviousRegSeqByUndefCount[NeededUndefs].empty()) 299 return false; 300 std::vector<MachineInstr *> &MIs = 301 PreviousRegSeqByUndefCount[NeededUndefs]; 302 CompatibleRSI = PreviousRegSeq[MIs.back()]; 303 tryMergeVector(&CompatibleRSI, &RSI, RemapChan); 304 return true; 305 } 306 307 void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) { 308 for (DenseMap<unsigned, unsigned>::const_iterator 309 It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) { 310 PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr); 311 } 312 PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr); 313 PreviousRegSeq[RSI.Instr] = RSI; 314 } 315 316 bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { 317 TII = static_cast<const R600InstrInfo *>(Fn.getTarget().getInstrInfo()); 318 MRI = &(Fn.getRegInfo()); 319 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 320 MBB != MBBe; ++MBB) { 321 MachineBasicBlock *MB = MBB; 322 PreviousRegSeq.clear(); 323 PreviousRegSeqByReg.clear(); 324 PreviousRegSeqByUndefCount.clear(); 325 326 for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end(); 327 MII != MIIE; ++MII) { 328 MachineInstr *MI = MII; 329 if (MI->getOpcode() != AMDGPU::REG_SEQUENCE) { 330 if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TEX_INST) { 331 unsigned Reg = MI->getOperand(1).getReg(); 332 for (MachineRegisterInfo::def_instr_iterator 333 It = MRI->def_instr_begin(Reg), E = MRI->def_instr_end(); 334 It != E; ++It) { 335 RemoveMI(&(*It)); 336 } 337 } 338 continue; 339 } 340 341 342 RegSeqInfo RSI(*MRI, MI); 343 344 // All uses of MI are swizzeable ? 345 unsigned Reg = MI->getOperand(0).getReg(); 346 if (!areAllUsesSwizzeable(Reg)) 347 continue; 348 349 DEBUG (dbgs() << "Trying to optimize "; 350 MI->dump(); 351 ); 352 353 RegSeqInfo CandidateRSI; 354 std::vector<std::pair<unsigned, unsigned> > RemapChan; 355 DEBUG(dbgs() << "Using common slots...\n";); 356 if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) { 357 // Remove CandidateRSI mapping 358 RemoveMI(CandidateRSI.Instr); 359 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 360 trackRSI(RSI); 361 continue; 362 } 363 DEBUG(dbgs() << "Using free slots...\n";); 364 RemapChan.clear(); 365 if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) { 366 RemoveMI(CandidateRSI.Instr); 367 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 368 trackRSI(RSI); 369 continue; 370 } 371 //Failed to merge 372 trackRSI(RSI); 373 } 374 } 375 return false; 376 } 377 378 } 379 380 llvm::FunctionPass *llvm::createR600VectorRegMerger(TargetMachine &tm) { 381 return new R600VectorRegMerger(tm); 382 } 383