1 //===- SSEDomainFix.cpp - Use proper int/float domain for SSE ---*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the SSEDomainFix pass. 11 // 12 // Some SSE instructions like mov, and, or, xor are available in different 13 // variants for different operand types. These variant instructions are 14 // equivalent, but on Nehalem and newer cpus there is extra latency 15 // transferring data between integer and floating point domains. 16 // 17 // This pass changes the variant instructions to minimize domain crossings. 18 // 19 //===----------------------------------------------------------------------===// 20 21 #define DEBUG_TYPE "sse-domain-fix" 22 #include "X86InstrInfo.h" 23 #include "llvm/CodeGen/MachineFunctionPass.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/ADT/DepthFirstIterator.h" 26 #include "llvm/Support/Allocator.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/raw_ostream.h" 29 using namespace llvm; 30 31 /// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track 32 /// of execution domains. 33 /// 34 /// An open DomainValue represents a set of instructions that can still switch 35 /// execution domain. Multiple registers may refer to the same open 36 /// DomainValue - they will eventually be collapsed to the same execution 37 /// domain. 38 /// 39 /// A collapsed DomainValue represents a single register that has been forced 40 /// into one of more execution domains. There is a separate collapsed 41 /// DomainValue for each register, but it may contain multiple execution 42 /// domains. A register value is initially created in a single execution 43 /// domain, but if we were forced to pay the penalty of a domain crossing, we 44 /// keep track of the fact the the register is now available in multiple 45 /// domains. 46 namespace { 47 struct DomainValue { 48 // Basic reference counting. 49 unsigned Refs; 50 51 // Bitmask of available domains. For an open DomainValue, it is the still 52 // possible domains for collapsing. For a collapsed DomainValue it is the 53 // domains where the register is available for free. 54 unsigned AvailableDomains; 55 56 // Position of the last defining instruction. 57 unsigned Dist; 58 59 // Twiddleable instructions using or defining these registers. 60 SmallVector<MachineInstr*, 8> Instrs; 61 62 // A collapsed DomainValue has no instructions to twiddle - it simply keeps 63 // track of the domains where the registers are already available. 64 bool isCollapsed() const { return Instrs.empty(); } 65 66 // Is domain available? 67 bool hasDomain(unsigned domain) const { 68 return AvailableDomains & (1u << domain); 69 } 70 71 // Mark domain as available. 72 void addDomain(unsigned domain) { 73 AvailableDomains |= 1u << domain; 74 } 75 76 // Restrict to a single domain available. 77 void setSingleDomain(unsigned domain) { 78 AvailableDomains = 1u << domain; 79 } 80 81 // Return bitmask of domains that are available and in mask. 82 unsigned getCommonDomains(unsigned mask) const { 83 return AvailableDomains & mask; 84 } 85 86 // First domain available. 87 unsigned getFirstDomain() const { 88 return CountTrailingZeros_32(AvailableDomains); 89 } 90 91 DomainValue() { clear(); } 92 93 void clear() { 94 Refs = AvailableDomains = Dist = 0; 95 Instrs.clear(); 96 } 97 }; 98 } 99 100 static const unsigned NumRegs = 16; 101 102 namespace { 103 class SSEDomainFixPass : public MachineFunctionPass { 104 static char ID; 105 SpecificBumpPtrAllocator<DomainValue> Allocator; 106 SmallVector<DomainValue*,16> Avail; 107 108 MachineFunction *MF; 109 const X86InstrInfo *TII; 110 const TargetRegisterInfo *TRI; 111 MachineBasicBlock *MBB; 112 DomainValue **LiveRegs; 113 typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap; 114 LiveOutMap LiveOuts; 115 unsigned Distance; 116 117 public: 118 SSEDomainFixPass() : MachineFunctionPass(ID) {} 119 120 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 121 AU.setPreservesAll(); 122 MachineFunctionPass::getAnalysisUsage(AU); 123 } 124 125 virtual bool runOnMachineFunction(MachineFunction &MF); 126 127 virtual const char *getPassName() const { 128 return "SSE execution domain fixup"; 129 } 130 131 private: 132 // Register mapping. 133 int RegIndex(unsigned Reg); 134 135 // DomainValue allocation. 136 DomainValue *Alloc(int domain = -1); 137 void Recycle(DomainValue*); 138 139 // LiveRegs manipulations. 140 void SetLiveReg(int rx, DomainValue *DV); 141 void Kill(int rx); 142 void Force(int rx, unsigned domain); 143 void Collapse(DomainValue *dv, unsigned domain); 144 bool Merge(DomainValue *A, DomainValue *B); 145 146 void enterBasicBlock(); 147 void visitGenericInstr(MachineInstr*); 148 void visitSoftInstr(MachineInstr*, unsigned mask); 149 void visitHardInstr(MachineInstr*, unsigned domain); 150 }; 151 } 152 153 char SSEDomainFixPass::ID = 0; 154 155 /// Translate TRI register number to an index into our smaller tables of 156 /// interesting registers. Return -1 for boring registers. 157 int SSEDomainFixPass::RegIndex(unsigned reg) { 158 assert(X86::XMM15 == X86::XMM0+NumRegs-1 && "Unexpected sort"); 159 reg -= X86::XMM0; 160 return reg < NumRegs ? (int) reg : -1; 161 } 162 163 DomainValue *SSEDomainFixPass::Alloc(int domain) { 164 DomainValue *dv = Avail.empty() ? 165 new(Allocator.Allocate()) DomainValue : 166 Avail.pop_back_val(); 167 dv->Dist = Distance; 168 if (domain >= 0) 169 dv->addDomain(domain); 170 return dv; 171 } 172 173 void SSEDomainFixPass::Recycle(DomainValue *dv) { 174 assert(dv && "Cannot recycle NULL"); 175 dv->clear(); 176 Avail.push_back(dv); 177 } 178 179 /// Set LiveRegs[rx] = dv, updating reference counts. 180 void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) { 181 assert(unsigned(rx) < NumRegs && "Invalid index"); 182 if (!LiveRegs) { 183 LiveRegs = new DomainValue*[NumRegs]; 184 std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0); 185 } 186 187 if (LiveRegs[rx] == dv) 188 return; 189 if (LiveRegs[rx]) { 190 assert(LiveRegs[rx]->Refs && "Bad refcount"); 191 if (--LiveRegs[rx]->Refs == 0) Recycle(LiveRegs[rx]); 192 } 193 LiveRegs[rx] = dv; 194 if (dv) ++dv->Refs; 195 } 196 197 // Kill register rx, recycle or collapse any DomainValue. 198 void SSEDomainFixPass::Kill(int rx) { 199 assert(unsigned(rx) < NumRegs && "Invalid index"); 200 if (!LiveRegs || !LiveRegs[rx]) return; 201 202 // Before killing the last reference to an open DomainValue, collapse it to 203 // the first available domain. 204 if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->isCollapsed()) 205 Collapse(LiveRegs[rx], LiveRegs[rx]->getFirstDomain()); 206 else 207 SetLiveReg(rx, 0); 208 } 209 210 /// Force register rx into domain. 211 void SSEDomainFixPass::Force(int rx, unsigned domain) { 212 assert(unsigned(rx) < NumRegs && "Invalid index"); 213 DomainValue *dv; 214 if (LiveRegs && (dv = LiveRegs[rx])) { 215 if (dv->isCollapsed()) 216 dv->addDomain(domain); 217 else if (dv->hasDomain(domain)) 218 Collapse(dv, domain); 219 else { 220 // This is an incompatible open DomainValue. Collapse it to whatever and force 221 // the new value into domain. This costs a domain crossing. 222 Collapse(dv, dv->getFirstDomain()); 223 assert(LiveRegs[rx] && "Not live after collapse?"); 224 LiveRegs[rx]->addDomain(domain); 225 } 226 } else { 227 // Set up basic collapsed DomainValue. 228 SetLiveReg(rx, Alloc(domain)); 229 } 230 } 231 232 /// Collapse open DomainValue into given domain. If there are multiple 233 /// registers using dv, they each get a unique collapsed DomainValue. 234 void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) { 235 assert(dv->hasDomain(domain) && "Cannot collapse"); 236 237 // Collapse all the instructions. 238 while (!dv->Instrs.empty()) 239 TII->SetSSEDomain(dv->Instrs.pop_back_val(), domain); 240 dv->setSingleDomain(domain); 241 242 // If there are multiple users, give them new, unique DomainValues. 243 if (LiveRegs && dv->Refs > 1) 244 for (unsigned rx = 0; rx != NumRegs; ++rx) 245 if (LiveRegs[rx] == dv) 246 SetLiveReg(rx, Alloc(domain)); 247 } 248 249 /// Merge - All instructions and registers in B are moved to A, and B is 250 /// released. 251 bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) { 252 assert(!A->isCollapsed() && "Cannot merge into collapsed"); 253 assert(!B->isCollapsed() && "Cannot merge from collapsed"); 254 if (A == B) 255 return true; 256 // Restrict to the domains that A and B have in common. 257 unsigned common = A->getCommonDomains(B->AvailableDomains); 258 if (!common) 259 return false; 260 A->AvailableDomains = common; 261 A->Dist = std::max(A->Dist, B->Dist); 262 A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); 263 for (unsigned rx = 0; rx != NumRegs; ++rx) 264 if (LiveRegs[rx] == B) 265 SetLiveReg(rx, A); 266 return true; 267 } 268 269 void SSEDomainFixPass::enterBasicBlock() { 270 // Try to coalesce live-out registers from predecessors. 271 for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), 272 e = MBB->livein_end(); i != e; ++i) { 273 int rx = RegIndex(*i); 274 if (rx < 0) continue; 275 for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), 276 pe = MBB->pred_end(); pi != pe; ++pi) { 277 LiveOutMap::const_iterator fi = LiveOuts.find(*pi); 278 if (fi == LiveOuts.end()) continue; 279 DomainValue *pdv = fi->second[rx]; 280 if (!pdv) continue; 281 if (!LiveRegs || !LiveRegs[rx]) { 282 SetLiveReg(rx, pdv); 283 continue; 284 } 285 286 // We have a live DomainValue from more than one predecessor. 287 if (LiveRegs[rx]->isCollapsed()) { 288 // We are already collapsed, but predecessor is not. Force him. 289 unsigned domain = LiveRegs[rx]->getFirstDomain(); 290 if (!pdv->isCollapsed() && pdv->hasDomain(domain)) 291 Collapse(pdv, domain); 292 continue; 293 } 294 295 // Currently open, merge in predecessor. 296 if (!pdv->isCollapsed()) 297 Merge(LiveRegs[rx], pdv); 298 else 299 Force(rx, pdv->getFirstDomain()); 300 } 301 } 302 } 303 304 // A hard instruction only works in one domain. All input registers will be 305 // forced into that domain. 306 void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) { 307 // Collapse all uses. 308 for (unsigned i = mi->getDesc().getNumDefs(), 309 e = mi->getDesc().getNumOperands(); i != e; ++i) { 310 MachineOperand &mo = mi->getOperand(i); 311 if (!mo.isReg()) continue; 312 int rx = RegIndex(mo.getReg()); 313 if (rx < 0) continue; 314 Force(rx, domain); 315 } 316 317 // Kill all defs and force them. 318 for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { 319 MachineOperand &mo = mi->getOperand(i); 320 if (!mo.isReg()) continue; 321 int rx = RegIndex(mo.getReg()); 322 if (rx < 0) continue; 323 Kill(rx); 324 Force(rx, domain); 325 } 326 } 327 328 // A soft instruction can be changed to work in other domains given by mask. 329 void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) { 330 // Bitmask of available domains for this instruction after taking collapsed 331 // operands into account. 332 unsigned available = mask; 333 334 // Scan the explicit use operands for incoming domains. 335 SmallVector<int, 4> used; 336 if (LiveRegs) 337 for (unsigned i = mi->getDesc().getNumDefs(), 338 e = mi->getDesc().getNumOperands(); i != e; ++i) { 339 MachineOperand &mo = mi->getOperand(i); 340 if (!mo.isReg()) continue; 341 int rx = RegIndex(mo.getReg()); 342 if (rx < 0) continue; 343 if (DomainValue *dv = LiveRegs[rx]) { 344 // Bitmask of domains that dv and available have in common. 345 unsigned common = dv->getCommonDomains(available); 346 // Is it possible to use this collapsed register for free? 347 if (dv->isCollapsed()) { 348 // Restrict available domains to the ones in common with the operand. 349 // If there are no common domains, we must pay the cross-domain 350 // penalty for this operand. 351 if (common) available = common; 352 } else if (common) 353 // Open DomainValue is compatible, save it for merging. 354 used.push_back(rx); 355 else 356 // Open DomainValue is not compatible with instruction. It is useless 357 // now. 358 Kill(rx); 359 } 360 } 361 362 // If the collapsed operands force a single domain, propagate the collapse. 363 if (isPowerOf2_32(available)) { 364 unsigned domain = CountTrailingZeros_32(available); 365 TII->SetSSEDomain(mi, domain); 366 visitHardInstr(mi, domain); 367 return; 368 } 369 370 // Kill off any remaining uses that don't match available, and build a list of 371 // incoming DomainValues that we want to merge. 372 SmallVector<DomainValue*,4> doms; 373 for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { 374 int rx = *i; 375 DomainValue *dv = LiveRegs[rx]; 376 // This useless DomainValue could have been missed above. 377 if (!dv->getCommonDomains(available)) { 378 Kill(*i); 379 continue; 380 } 381 // sorted, uniqued insert. 382 bool inserted = false; 383 for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end(); 384 i != e && !inserted; ++i) { 385 if (dv == *i) 386 inserted = true; 387 else if (dv->Dist < (*i)->Dist) { 388 inserted = true; 389 doms.insert(i, dv); 390 } 391 } 392 if (!inserted) 393 doms.push_back(dv); 394 } 395 396 // doms are now sorted in order of appearance. Try to merge them all, giving 397 // priority to the latest ones. 398 DomainValue *dv = 0; 399 while (!doms.empty()) { 400 if (!dv) { 401 dv = doms.pop_back_val(); 402 continue; 403 } 404 405 DomainValue *latest = doms.pop_back_val(); 406 if (Merge(dv, latest)) continue; 407 408 // If latest didn't merge, it is useless now. Kill all registers using it. 409 for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i) 410 if (LiveRegs[*i] == latest) 411 Kill(*i); 412 } 413 414 // dv is the DomainValue we are going to use for this instruction. 415 if (!dv) 416 dv = Alloc(); 417 dv->Dist = Distance; 418 dv->AvailableDomains = available; 419 dv->Instrs.push_back(mi); 420 421 // Finally set all defs and non-collapsed uses to dv. 422 for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) { 423 MachineOperand &mo = mi->getOperand(i); 424 if (!mo.isReg()) continue; 425 int rx = RegIndex(mo.getReg()); 426 if (rx < 0) continue; 427 if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) { 428 Kill(rx); 429 SetLiveReg(rx, dv); 430 } 431 } 432 } 433 434 void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) { 435 // Process explicit defs, kill any XMM registers redefined. 436 for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { 437 MachineOperand &mo = mi->getOperand(i); 438 if (!mo.isReg()) continue; 439 int rx = RegIndex(mo.getReg()); 440 if (rx < 0) continue; 441 Kill(rx); 442 } 443 } 444 445 bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) { 446 MF = &mf; 447 TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo()); 448 TRI = MF->getTarget().getRegisterInfo(); 449 MBB = 0; 450 LiveRegs = 0; 451 Distance = 0; 452 assert(NumRegs == X86::VR128RegClass.getNumRegs() && "Bad regclass"); 453 454 // If no XMM registers are used in the function, we can skip it completely. 455 bool anyregs = false; 456 for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(), 457 E = X86::VR128RegClass.end(); I != E; ++I) 458 if (MF->getRegInfo().isPhysRegUsed(*I)) { 459 anyregs = true; 460 break; 461 } 462 if (!anyregs) return false; 463 464 MachineBasicBlock *Entry = MF->begin(); 465 SmallPtrSet<MachineBasicBlock*, 16> Visited; 466 for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> > 467 DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited); 468 DFI != DFE; ++DFI) { 469 MBB = *DFI; 470 enterBasicBlock(); 471 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; 472 ++I) { 473 MachineInstr *mi = I; 474 if (mi->isDebugValue()) continue; 475 ++Distance; 476 std::pair<uint16_t, uint16_t> domp = TII->GetSSEDomain(mi); 477 if (domp.first) 478 if (domp.second) 479 visitSoftInstr(mi, domp.second); 480 else 481 visitHardInstr(mi, domp.first); 482 else if (LiveRegs) 483 visitGenericInstr(mi); 484 } 485 486 // Save live registers at end of MBB - used by enterBasicBlock(). 487 if (LiveRegs) 488 LiveOuts.insert(std::make_pair(MBB, LiveRegs)); 489 LiveRegs = 0; 490 } 491 492 // Clear the LiveOuts vectors. Should we also collapse any remaining 493 // DomainValues? 494 for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end(); 495 i != e; ++i) 496 delete[] i->second; 497 LiveOuts.clear(); 498 Avail.clear(); 499 Allocator.DestroyAll(); 500 501 return false; 502 } 503 504 FunctionPass *llvm::createSSEDomainFixPass() { 505 return new SSEDomainFixPass(); 506 } 507