1 //=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // The Cortex-A15 processor employs a tracking scheme in its register renaming 11 // in order to process each instruction's micro-ops speculatively and 12 // out-of-order with appropriate forwarding. The ARM architecture allows VFP 13 // instructions to read and write 32-bit S-registers. Each S-register 14 // corresponds to one half (upper or lower) of an overlaid 64-bit D-register. 15 // 16 // There are several instruction patterns which can be used to provide this 17 // capability which can provide higher performance than other, potentially more 18 // direct patterns, specifically around when one micro-op reads a D-register 19 // operand that has recently been written as one or more S-register results. 20 // 21 // This file defines a pre-regalloc pass which looks for SPR producers which 22 // are going to be used by a DPR (or QPR) consumers and creates the more 23 // optimized access pattern. 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "ARM.h" 28 #include "ARMBaseInstrInfo.h" 29 #include "ARMBaseRegisterInfo.h" 30 #include "ARMSubtarget.h" 31 #include "llvm/ADT/Statistic.h" 32 #include "llvm/CodeGen/MachineFunction.h" 33 #include "llvm/CodeGen/MachineFunctionPass.h" 34 #include "llvm/CodeGen/MachineInstr.h" 35 #include "llvm/CodeGen/MachineInstrBuilder.h" 36 #include "llvm/CodeGen/MachineRegisterInfo.h" 37 #include "llvm/Support/Debug.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include "llvm/Target/TargetRegisterInfo.h" 40 #include "llvm/Target/TargetSubtargetInfo.h" 41 #include <map> 42 #include <set> 43 44 using namespace llvm; 45 46 #define DEBUG_TYPE "a15-sd-optimizer" 47 48 namespace { 49 struct A15SDOptimizer : public MachineFunctionPass { 50 static char ID; 51 A15SDOptimizer() : MachineFunctionPass(ID) {} 52 53 bool runOnMachineFunction(MachineFunction &Fn) override; 54 55 const char *getPassName() const override { 56 return "ARM A15 S->D optimizer"; 57 } 58 59 private: 60 const ARMBaseInstrInfo *TII; 61 const TargetRegisterInfo *TRI; 62 MachineRegisterInfo *MRI; 63 64 bool runOnInstruction(MachineInstr *MI); 65 66 // 67 // Instruction builder helpers 68 // 69 unsigned createDupLane(MachineBasicBlock &MBB, 70 MachineBasicBlock::iterator InsertBefore, 71 DebugLoc DL, 72 unsigned Reg, unsigned Lane, 73 bool QPR=false); 74 75 unsigned createExtractSubreg(MachineBasicBlock &MBB, 76 MachineBasicBlock::iterator InsertBefore, 77 DebugLoc DL, 78 unsigned DReg, unsigned Lane, 79 const TargetRegisterClass *TRC); 80 81 unsigned createVExt(MachineBasicBlock &MBB, 82 MachineBasicBlock::iterator InsertBefore, 83 DebugLoc DL, 84 unsigned Ssub0, unsigned Ssub1); 85 86 unsigned createRegSequence(MachineBasicBlock &MBB, 87 MachineBasicBlock::iterator InsertBefore, 88 DebugLoc DL, 89 unsigned Reg1, unsigned Reg2); 90 91 unsigned createInsertSubreg(MachineBasicBlock &MBB, 92 MachineBasicBlock::iterator InsertBefore, 93 DebugLoc DL, unsigned DReg, unsigned Lane, 94 unsigned ToInsert); 95 96 unsigned createImplicitDef(MachineBasicBlock &MBB, 97 MachineBasicBlock::iterator InsertBefore, 98 DebugLoc DL); 99 100 // 101 // Various property checkers 102 // 103 bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC); 104 bool hasPartialWrite(MachineInstr *MI); 105 SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI); 106 unsigned getDPRLaneFromSPR(unsigned SReg); 107 108 // 109 // Methods used for getting the definitions of partial registers 110 // 111 112 MachineInstr *elideCopies(MachineInstr *MI); 113 void elideCopiesAndPHIs(MachineInstr *MI, 114 SmallVectorImpl<MachineInstr*> &Outs); 115 116 // 117 // Pattern optimization methods 118 // 119 unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg); 120 unsigned optimizeSDPattern(MachineInstr *MI); 121 unsigned getPrefSPRLane(unsigned SReg); 122 123 // 124 // Sanitizing method - used to make sure if don't leave dead code around. 125 // 126 void eraseInstrWithNoUses(MachineInstr *MI); 127 128 // 129 // A map used to track the changes done by this pass. 130 // 131 std::map<MachineInstr*, unsigned> Replacements; 132 std::set<MachineInstr *> DeadInstr; 133 }; 134 char A15SDOptimizer::ID = 0; 135 } // end anonymous namespace 136 137 // Returns true if this is a use of a SPR register. 138 bool A15SDOptimizer::usesRegClass(MachineOperand &MO, 139 const TargetRegisterClass *TRC) { 140 if (!MO.isReg()) 141 return false; 142 unsigned Reg = MO.getReg(); 143 144 if (TargetRegisterInfo::isVirtualRegister(Reg)) 145 return MRI->getRegClass(Reg)->hasSuperClassEq(TRC); 146 else 147 return TRC->contains(Reg); 148 } 149 150 unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) { 151 unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, 152 &ARM::DPRRegClass); 153 if (DReg != ARM::NoRegister) return ARM::ssub_1; 154 return ARM::ssub_0; 155 } 156 157 // Get the subreg type that is most likely to be coalesced 158 // for an SPR register that will be used in VDUP32d pseudo. 159 unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) { 160 if (!TRI->isVirtualRegister(SReg)) 161 return getDPRLaneFromSPR(SReg); 162 163 MachineInstr *MI = MRI->getVRegDef(SReg); 164 if (!MI) return ARM::ssub_0; 165 MachineOperand *MO = MI->findRegisterDefOperand(SReg); 166 167 assert(MO->isReg() && "Non-register operand found!"); 168 if (!MO) return ARM::ssub_0; 169 170 if (MI->isCopy() && usesRegClass(MI->getOperand(1), 171 &ARM::SPRRegClass)) { 172 SReg = MI->getOperand(1).getReg(); 173 } 174 175 if (TargetRegisterInfo::isVirtualRegister(SReg)) { 176 if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1; 177 return ARM::ssub_0; 178 } 179 return getDPRLaneFromSPR(SReg); 180 } 181 182 // MI is known to be dead. Figure out what instructions 183 // are also made dead by this and mark them for removal. 184 void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) { 185 SmallVector<MachineInstr *, 8> Front; 186 DeadInstr.insert(MI); 187 188 DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n"); 189 Front.push_back(MI); 190 191 while (Front.size() != 0) { 192 MI = Front.back(); 193 Front.pop_back(); 194 195 // MI is already known to be dead. We need to see 196 // if other instructions can also be removed. 197 for (unsigned int i = 0; i < MI->getNumOperands(); ++i) { 198 MachineOperand &MO = MI->getOperand(i); 199 if ((!MO.isReg()) || (!MO.isUse())) 200 continue; 201 unsigned Reg = MO.getReg(); 202 if (!TRI->isVirtualRegister(Reg)) 203 continue; 204 MachineOperand *Op = MI->findRegisterDefOperand(Reg); 205 206 if (!Op) 207 continue; 208 209 MachineInstr *Def = Op->getParent(); 210 211 // We don't need to do anything if we have already marked 212 // this instruction as being dead. 213 if (DeadInstr.find(Def) != DeadInstr.end()) 214 continue; 215 216 // Check if all the uses of this instruction are marked as 217 // dead. If so, we can also mark this instruction as being 218 // dead. 219 bool IsDead = true; 220 for (unsigned int j = 0; j < Def->getNumOperands(); ++j) { 221 MachineOperand &MODef = Def->getOperand(j); 222 if ((!MODef.isReg()) || (!MODef.isDef())) 223 continue; 224 unsigned DefReg = MODef.getReg(); 225 if (!TRI->isVirtualRegister(DefReg)) { 226 IsDead = false; 227 break; 228 } 229 for (MachineRegisterInfo::use_instr_iterator 230 II = MRI->use_instr_begin(Reg), EE = MRI->use_instr_end(); 231 II != EE; ++II) { 232 // We don't care about self references. 233 if (&*II == Def) 234 continue; 235 if (DeadInstr.find(&*II) == DeadInstr.end()) { 236 IsDead = false; 237 break; 238 } 239 } 240 } 241 242 if (!IsDead) continue; 243 244 DEBUG(dbgs() << "Deleting instruction " << *Def << "\n"); 245 DeadInstr.insert(Def); 246 } 247 } 248 } 249 250 // Creates the more optimized patterns and generally does all the code 251 // transformations in this pass. 252 unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { 253 if (MI->isCopy()) { 254 return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg()); 255 } 256 257 if (MI->isInsertSubreg()) { 258 unsigned DPRReg = MI->getOperand(1).getReg(); 259 unsigned SPRReg = MI->getOperand(2).getReg(); 260 261 if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) { 262 MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg()); 263 MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg()); 264 265 if (DPRMI && SPRMI) { 266 // See if the first operand of this insert_subreg is IMPLICIT_DEF 267 MachineInstr *ECDef = elideCopies(DPRMI); 268 if (ECDef && ECDef->isImplicitDef()) { 269 // Another corner case - if we're inserting something that is purely 270 // a subreg copy of a DPR, just use that DPR. 271 272 MachineInstr *EC = elideCopies(SPRMI); 273 // Is it a subreg copy of ssub_0? 274 if (EC && EC->isCopy() && 275 EC->getOperand(1).getSubReg() == ARM::ssub_0) { 276 DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI); 277 278 // Find the thing we're subreg copying out of - is it of the same 279 // regclass as DPRMI? (i.e. a DPR or QPR). 280 unsigned FullReg = SPRMI->getOperand(1).getReg(); 281 const TargetRegisterClass *TRC = 282 MRI->getRegClass(MI->getOperand(1).getReg()); 283 if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) { 284 DEBUG(dbgs() << "Subreg copy is compatible - returning "); 285 DEBUG(dbgs() << PrintReg(FullReg) << "\n"); 286 eraseInstrWithNoUses(MI); 287 return FullReg; 288 } 289 } 290 291 return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg()); 292 } 293 } 294 } 295 return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); 296 } 297 298 if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), 299 &ARM::SPRRegClass)) { 300 // See if all bar one of the operands are IMPLICIT_DEF and insert the 301 // optimizer pattern accordingly. 302 unsigned NumImplicit = 0, NumTotal = 0; 303 unsigned NonImplicitReg = ~0U; 304 305 for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) { 306 if (!MI->getOperand(I).isReg()) 307 continue; 308 ++NumTotal; 309 unsigned OpReg = MI->getOperand(I).getReg(); 310 311 if (!TRI->isVirtualRegister(OpReg)) 312 break; 313 314 MachineInstr *Def = MRI->getVRegDef(OpReg); 315 if (!Def) 316 break; 317 if (Def->isImplicitDef()) 318 ++NumImplicit; 319 else 320 NonImplicitReg = MI->getOperand(I).getReg(); 321 } 322 323 if (NumImplicit == NumTotal - 1) 324 return optimizeAllLanesPattern(MI, NonImplicitReg); 325 else 326 return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); 327 } 328 329 llvm_unreachable("Unhandled update pattern!"); 330 } 331 332 // Return true if this MachineInstr inserts a scalar (SPR) value into 333 // a D or Q register. 334 bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) { 335 // The only way we can do a partial register update is through a COPY, 336 // INSERT_SUBREG or REG_SEQUENCE. 337 if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass)) 338 return true; 339 340 if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2), 341 &ARM::SPRRegClass)) 342 return true; 343 344 if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass)) 345 return true; 346 347 return false; 348 } 349 350 // Looks through full copies to get the instruction that defines the input 351 // operand for MI. 352 MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) { 353 if (!MI->isFullCopy()) 354 return MI; 355 if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) 356 return nullptr; 357 MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg()); 358 if (!Def) 359 return nullptr; 360 return elideCopies(Def); 361 } 362 363 // Look through full copies and PHIs to get the set of non-copy MachineInstrs 364 // that can produce MI. 365 void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI, 366 SmallVectorImpl<MachineInstr*> &Outs) { 367 // Looking through PHIs may create loops so we need to track what 368 // instructions we have visited before. 369 std::set<MachineInstr *> Reached; 370 SmallVector<MachineInstr *, 8> Front; 371 Front.push_back(MI); 372 while (Front.size() != 0) { 373 MI = Front.back(); 374 Front.pop_back(); 375 376 // If we have already explored this MachineInstr, ignore it. 377 if (Reached.find(MI) != Reached.end()) 378 continue; 379 Reached.insert(MI); 380 if (MI->isPHI()) { 381 for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) { 382 unsigned Reg = MI->getOperand(I).getReg(); 383 if (!TRI->isVirtualRegister(Reg)) { 384 continue; 385 } 386 MachineInstr *NewMI = MRI->getVRegDef(Reg); 387 if (!NewMI) 388 continue; 389 Front.push_back(NewMI); 390 } 391 } else if (MI->isFullCopy()) { 392 if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) 393 continue; 394 MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg()); 395 if (!NewMI) 396 continue; 397 Front.push_back(NewMI); 398 } else { 399 DEBUG(dbgs() << "Found partial copy" << *MI <<"\n"); 400 Outs.push_back(MI); 401 } 402 } 403 } 404 405 // Return the DPR virtual registers that are read by this machine instruction 406 // (if any). 407 SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) { 408 if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() || 409 MI->isKill()) 410 return SmallVector<unsigned, 8>(); 411 412 SmallVector<unsigned, 8> Defs; 413 for (unsigned i = 0; i < MI->getNumOperands(); ++i) { 414 MachineOperand &MO = MI->getOperand(i); 415 416 if (!MO.isReg() || !MO.isUse()) 417 continue; 418 if (!usesRegClass(MO, &ARM::DPRRegClass) && 419 !usesRegClass(MO, &ARM::QPRRegClass) && 420 !usesRegClass(MO, &ARM::DPairRegClass)) // Treat DPair as QPR 421 continue; 422 423 Defs.push_back(MO.getReg()); 424 } 425 return Defs; 426 } 427 428 // Creates a DPR register from an SPR one by using a VDUP. 429 unsigned 430 A15SDOptimizer::createDupLane(MachineBasicBlock &MBB, 431 MachineBasicBlock::iterator InsertBefore, 432 DebugLoc DL, 433 unsigned Reg, unsigned Lane, bool QPR) { 434 unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : 435 &ARM::DPRRegClass); 436 AddDefaultPred(BuildMI(MBB, 437 InsertBefore, 438 DL, 439 TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), 440 Out) 441 .addReg(Reg) 442 .addImm(Lane)); 443 444 return Out; 445 } 446 447 // Creates a SPR register from a DPR by copying the value in lane 0. 448 unsigned 449 A15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB, 450 MachineBasicBlock::iterator InsertBefore, 451 DebugLoc DL, 452 unsigned DReg, unsigned Lane, 453 const TargetRegisterClass *TRC) { 454 unsigned Out = MRI->createVirtualRegister(TRC); 455 BuildMI(MBB, 456 InsertBefore, 457 DL, 458 TII->get(TargetOpcode::COPY), Out) 459 .addReg(DReg, 0, Lane); 460 461 return Out; 462 } 463 464 // Takes two SPR registers and creates a DPR by using a REG_SEQUENCE. 465 unsigned 466 A15SDOptimizer::createRegSequence(MachineBasicBlock &MBB, 467 MachineBasicBlock::iterator InsertBefore, 468 DebugLoc DL, 469 unsigned Reg1, unsigned Reg2) { 470 unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass); 471 BuildMI(MBB, 472 InsertBefore, 473 DL, 474 TII->get(TargetOpcode::REG_SEQUENCE), Out) 475 .addReg(Reg1) 476 .addImm(ARM::dsub_0) 477 .addReg(Reg2) 478 .addImm(ARM::dsub_1); 479 return Out; 480 } 481 482 // Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1) 483 // and merges them into one DPR register. 484 unsigned 485 A15SDOptimizer::createVExt(MachineBasicBlock &MBB, 486 MachineBasicBlock::iterator InsertBefore, 487 DebugLoc DL, 488 unsigned Ssub0, unsigned Ssub1) { 489 unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); 490 AddDefaultPred(BuildMI(MBB, 491 InsertBefore, 492 DL, 493 TII->get(ARM::VEXTd32), Out) 494 .addReg(Ssub0) 495 .addReg(Ssub1) 496 .addImm(1)); 497 return Out; 498 } 499 500 unsigned 501 A15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB, 502 MachineBasicBlock::iterator InsertBefore, 503 DebugLoc DL, unsigned DReg, unsigned Lane, 504 unsigned ToInsert) { 505 unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass); 506 BuildMI(MBB, 507 InsertBefore, 508 DL, 509 TII->get(TargetOpcode::INSERT_SUBREG), Out) 510 .addReg(DReg) 511 .addReg(ToInsert) 512 .addImm(Lane); 513 514 return Out; 515 } 516 517 unsigned 518 A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB, 519 MachineBasicBlock::iterator InsertBefore, 520 DebugLoc DL) { 521 unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); 522 BuildMI(MBB, 523 InsertBefore, 524 DL, 525 TII->get(TargetOpcode::IMPLICIT_DEF), Out); 526 return Out; 527 } 528 529 // This function inserts instructions in order to optimize interactions between 530 // SPR registers and DPR/QPR registers. It does so by performing VDUPs on all 531 // lanes, and the using VEXT instructions to recompose the result. 532 unsigned 533 A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) { 534 MachineBasicBlock::iterator InsertPt(MI); 535 DebugLoc DL = MI->getDebugLoc(); 536 MachineBasicBlock &MBB = *MI->getParent(); 537 InsertPt++; 538 unsigned Out; 539 540 // DPair has the same length as QPR and also has two DPRs as subreg. 541 // Treat DPair as QPR. 542 if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass) || 543 MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPairRegClass)) { 544 unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg, 545 ARM::dsub_0, &ARM::DPRRegClass); 546 unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg, 547 ARM::dsub_1, &ARM::DPRRegClass); 548 549 unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0); 550 unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1); 551 Out = createVExt(MBB, InsertPt, DL, Out1, Out2); 552 553 unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0); 554 unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1); 555 Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4); 556 557 Out = createRegSequence(MBB, InsertPt, DL, Out, Out2); 558 559 } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) { 560 unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0); 561 unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1); 562 Out = createVExt(MBB, InsertPt, DL, Out1, Out2); 563 564 } else { 565 assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) && 566 "Found unexpected regclass!"); 567 568 unsigned PrefLane = getPrefSPRLane(Reg); 569 unsigned Lane; 570 switch (PrefLane) { 571 case ARM::ssub_0: Lane = 0; break; 572 case ARM::ssub_1: Lane = 1; break; 573 default: llvm_unreachable("Unknown preferred lane!"); 574 } 575 576 // Treat DPair as QPR 577 bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass) || 578 usesRegClass(MI->getOperand(0), &ARM::DPairRegClass); 579 580 Out = createImplicitDef(MBB, InsertPt, DL); 581 Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg); 582 Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR); 583 eraseInstrWithNoUses(MI); 584 } 585 return Out; 586 } 587 588 bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { 589 // We look for instructions that write S registers that are then read as 590 // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and 591 // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or 592 // merge two SPR values to form a DPR register. In order avoid false 593 // positives we make sure that there is an SPR producer so we look past 594 // COPY and PHI nodes to find it. 595 // 596 // The best code pattern for when an SPR producer is going to be used by a 597 // DPR or QPR consumer depends on whether the other lanes of the 598 // corresponding DPR/QPR are currently defined. 599 // 600 // We can handle these efficiently, depending on the type of 601 // pseudo-instruction that is producing the pattern 602 // 603 // * COPY: * VDUP all lanes and merge the results together 604 // using VEXTs. 605 // 606 // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR 607 // lane, and the other lane(s) of the DPR/QPR register 608 // that we are inserting in are undefined, use the 609 // original DPR/QPR value. 610 // * Otherwise, fall back on the same stategy as COPY. 611 // 612 // * REG_SEQUENCE: * If all except one of the input operands are 613 // IMPLICIT_DEFs, insert the VDUP pattern for just the 614 // defined input operand 615 // * Otherwise, fall back on the same stategy as COPY. 616 // 617 618 // First, get all the reads of D-registers done by this instruction. 619 SmallVector<unsigned, 8> Defs = getReadDPRs(MI); 620 bool Modified = false; 621 622 for (SmallVectorImpl<unsigned>::iterator I = Defs.begin(), E = Defs.end(); 623 I != E; ++I) { 624 // Follow the def-use chain for this DPR through COPYs, and also through 625 // PHIs (which are essentially multi-way COPYs). It is because of PHIs that 626 // we can end up with multiple defs of this DPR. 627 628 SmallVector<MachineInstr *, 8> DefSrcs; 629 if (!TRI->isVirtualRegister(*I)) 630 continue; 631 MachineInstr *Def = MRI->getVRegDef(*I); 632 if (!Def) 633 continue; 634 635 elideCopiesAndPHIs(Def, DefSrcs); 636 637 for (SmallVectorImpl<MachineInstr *>::iterator II = DefSrcs.begin(), 638 EE = DefSrcs.end(); II != EE; ++II) { 639 MachineInstr *MI = *II; 640 641 // If we've already analyzed and replaced this operand, don't do 642 // anything. 643 if (Replacements.find(MI) != Replacements.end()) 644 continue; 645 646 // Now, work out if the instruction causes a SPR->DPR dependency. 647 if (!hasPartialWrite(MI)) 648 continue; 649 650 // Collect all the uses of this MI's DPR def for updating later. 651 SmallVector<MachineOperand*, 8> Uses; 652 unsigned DPRDefReg = MI->getOperand(0).getReg(); 653 for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg), 654 E = MRI->use_end(); I != E; ++I) 655 Uses.push_back(&*I); 656 657 // We can optimize this. 658 unsigned NewReg = optimizeSDPattern(MI); 659 660 if (NewReg != 0) { 661 Modified = true; 662 for (SmallVectorImpl<MachineOperand *>::const_iterator I = Uses.begin(), 663 E = Uses.end(); I != E; ++I) { 664 // Make sure to constrain the register class of the new register to 665 // match what we're replacing. Otherwise we can optimize a DPR_VFP2 666 // reference into a plain DPR, and that will end poorly. NewReg is 667 // always virtual here, so there will always be a matching subclass 668 // to find. 669 MRI->constrainRegClass(NewReg, MRI->getRegClass((*I)->getReg())); 670 671 DEBUG(dbgs() << "Replacing operand " 672 << **I << " with " 673 << PrintReg(NewReg) << "\n"); 674 (*I)->substVirtReg(NewReg, 0, *TRI); 675 } 676 } 677 Replacements[MI] = NewReg; 678 } 679 } 680 return Modified; 681 } 682 683 bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { 684 const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>(); 685 // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be 686 // enabled when NEON is available. 687 if (!(STI.isCortexA15() && STI.hasNEON())) 688 return false; 689 TII = STI.getInstrInfo(); 690 TRI = STI.getRegisterInfo(); 691 MRI = &Fn.getRegInfo(); 692 bool Modified = false; 693 694 DEBUG(dbgs() << "Running on function " << Fn.getName()<< "\n"); 695 696 DeadInstr.clear(); 697 Replacements.clear(); 698 699 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 700 ++MFI) { 701 702 for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end(); 703 MI != ME;) { 704 Modified |= runOnInstruction(MI++); 705 } 706 707 } 708 709 for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(), 710 E = DeadInstr.end(); 711 I != E; ++I) { 712 (*I)->eraseFromParent(); 713 } 714 715 return Modified; 716 } 717 718 FunctionPass *llvm::createA15SDOptimizerPass() { 719 return new A15SDOptimizer(); 720 } 721