1 //===--------------------- R600MergeVectorRegisters.cpp -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// This pass merges inputs of swizzeable instructions into vector sharing 12 /// common data and/or have enough undef subreg using swizzle abilities. 13 /// 14 /// For instance let's consider the following pseudo code : 15 /// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 16 /// ... 17 /// vreg7<def> = REG_SEQ vreg1, sub0, vreg3, sub1, undef, sub2, vreg4, sub3 18 /// (swizzable Inst) vreg7, SwizzleMask : sub0, sub1, sub2, sub3 19 /// 20 /// is turned into : 21 /// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 22 /// ... 23 /// vreg7<def> = INSERT_SUBREG vreg4, sub3 24 /// (swizzable Inst) vreg7, SwizzleMask : sub0, sub2, sub1, sub3 25 /// 26 /// This allow regalloc to reduce register pressure for vector registers and 27 /// to reduce MOV count. 28 //===----------------------------------------------------------------------===// 29 30 #define DEBUG_TYPE "vec-merger" 31 #include "llvm/Support/Debug.h" 32 #include "AMDGPU.h" 33 #include "R600InstrInfo.h" 34 #include "llvm/CodeGen/DFAPacketizer.h" 35 #include "llvm/CodeGen/MachineDominators.h" 36 #include "llvm/CodeGen/MachineFunctionPass.h" 37 #include "llvm/CodeGen/MachineLoopInfo.h" 38 #include "llvm/CodeGen/Passes.h" 39 #include "llvm/CodeGen/MachineInstrBuilder.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include "llvm/CodeGen/MachineRegisterInfo.h" 42 43 using namespace llvm; 44 45 namespace { 46 47 static bool 48 isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) { 49 for (MachineRegisterInfo::def_iterator It = MRI.def_begin(Reg), 50 E = MRI.def_end(); It != E; ++It) { 51 return (*It).isImplicitDef(); 52 } 53 llvm_unreachable("Reg without a def"); 54 return false; 55 } 56 57 class RegSeqInfo { 58 public: 59 MachineInstr *Instr; 60 DenseMap<unsigned, unsigned> RegToChan; 61 std::vector<unsigned> UndefReg; 62 RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) { 63 assert (MI->getOpcode() == AMDGPU::REG_SEQUENCE); 64 for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) { 65 MachineOperand &MO = Instr->getOperand(i); 66 unsigned Chan = Instr->getOperand(i + 1).getImm(); 67 if (isImplicitlyDef(MRI, MO.getReg())) 68 UndefReg.push_back(Chan); 69 else 70 RegToChan[MO.getReg()] = Chan; 71 } 72 } 73 RegSeqInfo() {} 74 75 bool operator==(const RegSeqInfo &RSI) const { 76 return RSI.Instr == Instr; 77 } 78 }; 79 80 class R600VectorRegMerger : public MachineFunctionPass { 81 private: 82 MachineRegisterInfo *MRI; 83 const R600InstrInfo *TII; 84 bool canSwizzle(const MachineInstr &) const; 85 bool areAllUsesSwizzeable(unsigned Reg) const; 86 void SwizzleInput(MachineInstr &, 87 const std::vector<std::pair<unsigned, unsigned> > &) const; 88 bool tryMergeVector(const RegSeqInfo *, RegSeqInfo *, 89 std::vector<std::pair<unsigned, unsigned> > &Remap) const; 90 bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 91 std::vector<std::pair<unsigned, unsigned> > &RemapChan); 92 bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 93 std::vector<std::pair<unsigned, unsigned> > &RemapChan); 94 MachineInstr *RebuildVector(RegSeqInfo *MI, 95 const RegSeqInfo *BaseVec, 96 const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const; 97 void RemoveMI(MachineInstr *); 98 void trackRSI(const RegSeqInfo &RSI); 99 100 typedef DenseMap<unsigned, std::vector<MachineInstr *> > InstructionSetMap; 101 DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq; 102 InstructionSetMap PreviousRegSeqByReg; 103 InstructionSetMap PreviousRegSeqByUndefCount; 104 public: 105 static char ID; 106 R600VectorRegMerger(TargetMachine &tm) : MachineFunctionPass(ID), 107 TII(0) { } 108 109 void getAnalysisUsage(AnalysisUsage &AU) const { 110 AU.setPreservesCFG(); 111 AU.addRequired<MachineDominatorTree>(); 112 AU.addPreserved<MachineDominatorTree>(); 113 AU.addRequired<MachineLoopInfo>(); 114 AU.addPreserved<MachineLoopInfo>(); 115 MachineFunctionPass::getAnalysisUsage(AU); 116 } 117 118 const char *getPassName() const { 119 return "R600 Vector Registers Merge Pass"; 120 } 121 122 bool runOnMachineFunction(MachineFunction &Fn); 123 }; 124 125 char R600VectorRegMerger::ID = 0; 126 127 bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI) 128 const { 129 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 130 return true; 131 switch (MI.getOpcode()) { 132 case AMDGPU::R600_ExportSwz: 133 case AMDGPU::EG_ExportSwz: 134 return true; 135 default: 136 return false; 137 } 138 } 139 140 bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched, 141 RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned> > &Remap) 142 const { 143 unsigned CurrentUndexIdx = 0; 144 for (DenseMap<unsigned, unsigned>::iterator It = ToMerge->RegToChan.begin(), 145 E = ToMerge->RegToChan.end(); It != E; ++It) { 146 DenseMap<unsigned, unsigned>::const_iterator PosInUntouched = 147 Untouched->RegToChan.find((*It).first); 148 if (PosInUntouched != Untouched->RegToChan.end()) { 149 Remap.push_back(std::pair<unsigned, unsigned> 150 ((*It).second, (*PosInUntouched).second)); 151 continue; 152 } 153 if (CurrentUndexIdx >= Untouched->UndefReg.size()) 154 return false; 155 Remap.push_back(std::pair<unsigned, unsigned> 156 ((*It).second, Untouched->UndefReg[CurrentUndexIdx++])); 157 } 158 159 return true; 160 } 161 162 static 163 unsigned getReassignedChan( 164 const std::vector<std::pair<unsigned, unsigned> > &RemapChan, 165 unsigned Chan) { 166 for (unsigned j = 0, je = RemapChan.size(); j < je; j++) { 167 if (RemapChan[j].first == Chan) 168 return RemapChan[j].second; 169 } 170 llvm_unreachable("Chan wasn't reassigned"); 171 } 172 173 MachineInstr *R600VectorRegMerger::RebuildVector( 174 RegSeqInfo *RSI, const RegSeqInfo *BaseRSI, 175 const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const { 176 unsigned Reg = RSI->Instr->getOperand(0).getReg(); 177 MachineBasicBlock::iterator Pos = RSI->Instr; 178 MachineBasicBlock &MBB = *Pos->getParent(); 179 DebugLoc DL = Pos->getDebugLoc(); 180 181 unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg(); 182 DenseMap<unsigned, unsigned> UpdatedRegToChan = BaseRSI->RegToChan; 183 std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg; 184 for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(), 185 E = RSI->RegToChan.end(); It != E; ++It) { 186 unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 187 unsigned SubReg = (*It).first; 188 unsigned Swizzle = (*It).second; 189 unsigned Chan = getReassignedChan(RemapChan, Swizzle); 190 191 MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::INSERT_SUBREG), 192 DstReg) 193 .addReg(SrcVec) 194 .addReg(SubReg) 195 .addImm(Chan); 196 UpdatedRegToChan[SubReg] = Chan; 197 std::vector<unsigned>::iterator ChanPos = 198 std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan); 199 if (ChanPos != UpdatedUndef.end()) 200 UpdatedUndef.erase(ChanPos); 201 assert(std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan) == 202 UpdatedUndef.end() && 203 "UpdatedUndef shouldn't contain Chan more than once!"); 204 DEBUG(dbgs() << " ->"; Tmp->dump();); 205 (void)Tmp; 206 SrcVec = DstReg; 207 } 208 Pos = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg) 209 .addReg(SrcVec); 210 DEBUG(dbgs() << " ->"; Pos->dump();); 211 212 DEBUG(dbgs() << " Updating Swizzle:\n"); 213 for (MachineRegisterInfo::use_iterator It = MRI->use_begin(Reg), 214 E = MRI->use_end(); It != E; ++It) { 215 DEBUG(dbgs() << " ";(*It).dump(); dbgs() << " ->"); 216 SwizzleInput(*It, RemapChan); 217 DEBUG((*It).dump()); 218 } 219 RSI->Instr->eraseFromParent(); 220 221 // Update RSI 222 RSI->Instr = Pos; 223 RSI->RegToChan = UpdatedRegToChan; 224 RSI->UndefReg = UpdatedUndef; 225 226 return Pos; 227 } 228 229 void R600VectorRegMerger::RemoveMI(MachineInstr *MI) { 230 for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(), 231 E = PreviousRegSeqByReg.end(); It != E; ++It) { 232 std::vector<MachineInstr *> &MIs = (*It).second; 233 MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end()); 234 } 235 for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(), 236 E = PreviousRegSeqByUndefCount.end(); It != E; ++It) { 237 std::vector<MachineInstr *> &MIs = (*It).second; 238 MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end()); 239 } 240 } 241 242 void R600VectorRegMerger::SwizzleInput(MachineInstr &MI, 243 const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const { 244 unsigned Offset; 245 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 246 Offset = 2; 247 else 248 Offset = 3; 249 for (unsigned i = 0; i < 4; i++) { 250 unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1; 251 for (unsigned j = 0, e = RemapChan.size(); j < e; j++) { 252 if (RemapChan[j].first == Swizzle) { 253 MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1); 254 break; 255 } 256 } 257 } 258 } 259 260 bool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const { 261 for (MachineRegisterInfo::use_iterator It = MRI->use_begin(Reg), 262 E = MRI->use_end(); It != E; ++It) { 263 if (!canSwizzle(*It)) 264 return false; 265 } 266 return true; 267 } 268 269 bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI, 270 RegSeqInfo &CompatibleRSI, 271 std::vector<std::pair<unsigned, unsigned> > &RemapChan) { 272 for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(), 273 MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) { 274 if (!MOp->isReg()) 275 continue; 276 if (PreviousRegSeqByReg[MOp->getReg()].empty()) 277 continue; 278 std::vector<MachineInstr *> MIs = PreviousRegSeqByReg[MOp->getReg()]; 279 for (unsigned i = 0, e = MIs.size(); i < e; i++) { 280 CompatibleRSI = PreviousRegSeq[MIs[i]]; 281 if (RSI == CompatibleRSI) 282 continue; 283 if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan)) 284 return true; 285 } 286 } 287 return false; 288 } 289 290 bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI, 291 RegSeqInfo &CompatibleRSI, 292 std::vector<std::pair<unsigned, unsigned> > &RemapChan) { 293 unsigned NeededUndefs = 4 - RSI.UndefReg.size(); 294 if (PreviousRegSeqByUndefCount[NeededUndefs].empty()) 295 return false; 296 std::vector<MachineInstr *> &MIs = 297 PreviousRegSeqByUndefCount[NeededUndefs]; 298 CompatibleRSI = PreviousRegSeq[MIs.back()]; 299 tryMergeVector(&CompatibleRSI, &RSI, RemapChan); 300 return true; 301 } 302 303 void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) { 304 for (DenseMap<unsigned, unsigned>::const_iterator 305 It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) { 306 PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr); 307 } 308 PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr); 309 PreviousRegSeq[RSI.Instr] = RSI; 310 } 311 312 bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { 313 TII = static_cast<const R600InstrInfo *>(Fn.getTarget().getInstrInfo()); 314 MRI = &(Fn.getRegInfo()); 315 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 316 MBB != MBBe; ++MBB) { 317 MachineBasicBlock *MB = MBB; 318 PreviousRegSeq.clear(); 319 PreviousRegSeqByReg.clear(); 320 PreviousRegSeqByUndefCount.clear(); 321 322 for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end(); 323 MII != MIIE; ++MII) { 324 MachineInstr *MI = MII; 325 if (MI->getOpcode() != AMDGPU::REG_SEQUENCE) { 326 if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TEX_INST) { 327 unsigned Reg = MI->getOperand(1).getReg(); 328 for (MachineRegisterInfo::def_iterator It = MRI->def_begin(Reg), 329 E = MRI->def_end(); It != E; ++It) { 330 RemoveMI(&(*It)); 331 } 332 } 333 continue; 334 } 335 336 337 RegSeqInfo RSI(*MRI, MI); 338 339 // All uses of MI are swizzeable ? 340 unsigned Reg = MI->getOperand(0).getReg(); 341 if (!areAllUsesSwizzeable(Reg)) 342 continue; 343 344 DEBUG (dbgs() << "Trying to optimize "; 345 MI->dump(); 346 ); 347 348 RegSeqInfo CandidateRSI; 349 std::vector<std::pair<unsigned, unsigned> > RemapChan; 350 DEBUG(dbgs() << "Using common slots...\n";); 351 if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) { 352 // Remove CandidateRSI mapping 353 RemoveMI(CandidateRSI.Instr); 354 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 355 trackRSI(RSI); 356 continue; 357 } 358 DEBUG(dbgs() << "Using free slots...\n";); 359 RemapChan.clear(); 360 if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) { 361 RemoveMI(CandidateRSI.Instr); 362 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 363 trackRSI(RSI); 364 continue; 365 } 366 //Failed to merge 367 trackRSI(RSI); 368 } 369 } 370 return false; 371 } 372 373 } 374 375 llvm::FunctionPass *llvm::createR600VectorRegMerger(TargetMachine &tm) { 376 return new R600VectorRegMerger(tm); 377 } 378