1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-ldst-opt" 16 #include "ARM.h" 17 #include "ARMBaseInstrInfo.h" 18 #include "ARMBaseRegisterInfo.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "MCTargetDesc/ARMAddressingModes.h" 21 #include "llvm/DerivedTypes.h" 22 #include "llvm/Function.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/RegisterScavenging.h" 29 #include "llvm/CodeGen/SelectionDAGNodes.h" 30 #include "llvm/Target/TargetData.h" 31 #include "llvm/Target/TargetInstrInfo.h" 32 #include "llvm/Target/TargetMachine.h" 33 #include "llvm/Target/TargetRegisterInfo.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/raw_ostream.h" 37 #include "llvm/ADT/DenseMap.h" 38 #include "llvm/ADT/STLExtras.h" 39 #include "llvm/ADT/SmallPtrSet.h" 40 #include "llvm/ADT/SmallSet.h" 41 #include "llvm/ADT/SmallVector.h" 42 #include "llvm/ADT/Statistic.h" 43 using namespace llvm; 44 45 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 46 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 47 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 48 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 49 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 50 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 51 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 52 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 53 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 54 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 55 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 56 57 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 58 /// load / store instructions to form ldm / stm instructions. 59 60 namespace { 61 struct ARMLoadStoreOpt : public MachineFunctionPass { 62 static char ID; 63 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 64 65 const TargetInstrInfo *TII; 66 const TargetRegisterInfo *TRI; 67 const ARMSubtarget *STI; 68 ARMFunctionInfo *AFI; 69 RegScavenger *RS; 70 bool isThumb2; 71 72 virtual bool runOnMachineFunction(MachineFunction &Fn); 73 74 virtual const char *getPassName() const { 75 return "ARM load / store optimization pass"; 76 } 77 78 private: 79 struct MemOpQueueEntry { 80 int Offset; 81 unsigned Reg; 82 bool isKill; 83 unsigned Position; 84 MachineBasicBlock::iterator MBBI; 85 bool Merged; 86 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 87 MachineBasicBlock::iterator i) 88 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 89 }; 90 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 91 typedef MemOpQueue::iterator MemOpQueueIter; 92 93 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 94 int Offset, unsigned Base, bool BaseKill, int Opcode, 95 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 96 DebugLoc dl, 97 ArrayRef<std::pair<unsigned, bool> > Regs, 98 ArrayRef<unsigned> ImpDefs); 99 void MergeOpsUpdate(MachineBasicBlock &MBB, 100 MemOpQueue &MemOps, 101 unsigned memOpsBegin, 102 unsigned memOpsEnd, 103 unsigned insertAfter, 104 int Offset, 105 unsigned Base, 106 bool BaseKill, 107 int Opcode, 108 ARMCC::CondCodes Pred, 109 unsigned PredReg, 110 unsigned Scratch, 111 DebugLoc dl, 112 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 113 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 114 int Opcode, unsigned Size, 115 ARMCC::CondCodes Pred, unsigned PredReg, 116 unsigned Scratch, MemOpQueue &MemOps, 117 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 118 119 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 120 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 121 MachineBasicBlock::iterator &MBBI); 122 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 123 MachineBasicBlock::iterator MBBI, 124 const TargetInstrInfo *TII, 125 bool &Advance, 126 MachineBasicBlock::iterator &I); 127 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 128 MachineBasicBlock::iterator MBBI, 129 bool &Advance, 130 MachineBasicBlock::iterator &I); 131 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 132 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 133 }; 134 char ARMLoadStoreOpt::ID = 0; 135 } 136 137 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 138 switch (Opcode) { 139 default: llvm_unreachable("Unhandled opcode!"); 140 case ARM::LDRi12: 141 ++NumLDMGened; 142 switch (Mode) { 143 default: llvm_unreachable("Unhandled submode!"); 144 case ARM_AM::ia: return ARM::LDMIA; 145 case ARM_AM::da: return ARM::LDMDA; 146 case ARM_AM::db: return ARM::LDMDB; 147 case ARM_AM::ib: return ARM::LDMIB; 148 } 149 case ARM::STRi12: 150 ++NumSTMGened; 151 switch (Mode) { 152 default: llvm_unreachable("Unhandled submode!"); 153 case ARM_AM::ia: return ARM::STMIA; 154 case ARM_AM::da: return ARM::STMDA; 155 case ARM_AM::db: return ARM::STMDB; 156 case ARM_AM::ib: return ARM::STMIB; 157 } 158 case ARM::t2LDRi8: 159 case ARM::t2LDRi12: 160 ++NumLDMGened; 161 switch (Mode) { 162 default: llvm_unreachable("Unhandled submode!"); 163 case ARM_AM::ia: return ARM::t2LDMIA; 164 case ARM_AM::db: return ARM::t2LDMDB; 165 } 166 case ARM::t2STRi8: 167 case ARM::t2STRi12: 168 ++NumSTMGened; 169 switch (Mode) { 170 default: llvm_unreachable("Unhandled submode!"); 171 case ARM_AM::ia: return ARM::t2STMIA; 172 case ARM_AM::db: return ARM::t2STMDB; 173 } 174 case ARM::VLDRS: 175 ++NumVLDMGened; 176 switch (Mode) { 177 default: llvm_unreachable("Unhandled submode!"); 178 case ARM_AM::ia: return ARM::VLDMSIA; 179 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 180 } 181 case ARM::VSTRS: 182 ++NumVSTMGened; 183 switch (Mode) { 184 default: llvm_unreachable("Unhandled submode!"); 185 case ARM_AM::ia: return ARM::VSTMSIA; 186 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 187 } 188 case ARM::VLDRD: 189 ++NumVLDMGened; 190 switch (Mode) { 191 default: llvm_unreachable("Unhandled submode!"); 192 case ARM_AM::ia: return ARM::VLDMDIA; 193 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 194 } 195 case ARM::VSTRD: 196 ++NumVSTMGened; 197 switch (Mode) { 198 default: llvm_unreachable("Unhandled submode!"); 199 case ARM_AM::ia: return ARM::VSTMDIA; 200 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 201 } 202 } 203 } 204 205 namespace llvm { 206 namespace ARM_AM { 207 208 AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 209 switch (Opcode) { 210 default: llvm_unreachable("Unhandled opcode!"); 211 case ARM::LDMIA_RET: 212 case ARM::LDMIA: 213 case ARM::LDMIA_UPD: 214 case ARM::STMIA: 215 case ARM::STMIA_UPD: 216 case ARM::t2LDMIA_RET: 217 case ARM::t2LDMIA: 218 case ARM::t2LDMIA_UPD: 219 case ARM::t2STMIA: 220 case ARM::t2STMIA_UPD: 221 case ARM::VLDMSIA: 222 case ARM::VLDMSIA_UPD: 223 case ARM::VSTMSIA: 224 case ARM::VSTMSIA_UPD: 225 case ARM::VLDMDIA: 226 case ARM::VLDMDIA_UPD: 227 case ARM::VSTMDIA: 228 case ARM::VSTMDIA_UPD: 229 return ARM_AM::ia; 230 231 case ARM::LDMDA: 232 case ARM::LDMDA_UPD: 233 case ARM::STMDA: 234 case ARM::STMDA_UPD: 235 return ARM_AM::da; 236 237 case ARM::LDMDB: 238 case ARM::LDMDB_UPD: 239 case ARM::STMDB: 240 case ARM::STMDB_UPD: 241 case ARM::t2LDMDB: 242 case ARM::t2LDMDB_UPD: 243 case ARM::t2STMDB: 244 case ARM::t2STMDB_UPD: 245 case ARM::VLDMSDB_UPD: 246 case ARM::VSTMSDB_UPD: 247 case ARM::VLDMDDB_UPD: 248 case ARM::VSTMDDB_UPD: 249 return ARM_AM::db; 250 251 case ARM::LDMIB: 252 case ARM::LDMIB_UPD: 253 case ARM::STMIB: 254 case ARM::STMIB_UPD: 255 return ARM_AM::ib; 256 } 257 } 258 259 } // end namespace ARM_AM 260 } // end namespace llvm 261 262 static bool isT2i32Load(unsigned Opc) { 263 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 264 } 265 266 static bool isi32Load(unsigned Opc) { 267 return Opc == ARM::LDRi12 || isT2i32Load(Opc); 268 } 269 270 static bool isT2i32Store(unsigned Opc) { 271 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 272 } 273 274 static bool isi32Store(unsigned Opc) { 275 return Opc == ARM::STRi12 || isT2i32Store(Opc); 276 } 277 278 /// MergeOps - Create and insert a LDM or STM with Base as base register and 279 /// registers in Regs as the register operands that would be loaded / stored. 280 /// It returns true if the transformation is done. 281 bool 282 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 283 MachineBasicBlock::iterator MBBI, 284 int Offset, unsigned Base, bool BaseKill, 285 int Opcode, ARMCC::CondCodes Pred, 286 unsigned PredReg, unsigned Scratch, DebugLoc dl, 287 ArrayRef<std::pair<unsigned, bool> > Regs, 288 ArrayRef<unsigned> ImpDefs) { 289 // Only a single register to load / store. Don't bother. 290 unsigned NumRegs = Regs.size(); 291 if (NumRegs <= 1) 292 return false; 293 294 ARM_AM::AMSubMode Mode = ARM_AM::ia; 295 // VFP and Thumb2 do not support IB or DA modes. 296 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 297 bool haveIBAndDA = isNotVFP && !isThumb2; 298 if (Offset == 4 && haveIBAndDA) 299 Mode = ARM_AM::ib; 300 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) 301 Mode = ARM_AM::da; 302 else if (Offset == -4 * (int)NumRegs && isNotVFP) 303 // VLDM/VSTM do not support DB mode without also updating the base reg. 304 Mode = ARM_AM::db; 305 else if (Offset != 0) { 306 // Check if this is a supported opcode before we insert instructions to 307 // calculate a new base register. 308 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 309 310 // If starting offset isn't zero, insert a MI to materialize a new base. 311 // But only do so if it is cost effective, i.e. merging more than two 312 // loads / stores. 313 if (NumRegs <= 2) 314 return false; 315 316 unsigned NewBase; 317 if (isi32Load(Opcode)) 318 // If it is a load, then just use one of the destination register to 319 // use as the new base. 320 NewBase = Regs[NumRegs-1].first; 321 else { 322 // Use the scratch register to use as a new base. 323 NewBase = Scratch; 324 if (NewBase == 0) 325 return false; 326 } 327 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri; 328 if (Offset < 0) { 329 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; 330 Offset = - Offset; 331 } 332 int ImmedOffset = isThumb2 333 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 334 if (ImmedOffset == -1) 335 // FIXME: Try t2ADDri12 or t2SUBri12? 336 return false; // Probably not worth it then. 337 338 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 339 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 340 .addImm(Pred).addReg(PredReg).addReg(0); 341 Base = NewBase; 342 BaseKill = true; // New base is always killed right its use. 343 } 344 345 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 346 Opcode == ARM::VLDRD); 347 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 348 if (!Opcode) return false; 349 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 350 .addReg(Base, getKillRegState(BaseKill)) 351 .addImm(Pred).addReg(PredReg); 352 for (unsigned i = 0; i != NumRegs; ++i) 353 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 354 | getKillRegState(Regs[i].second)); 355 356 // Add implicit defs for super-registers. 357 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i) 358 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine); 359 360 return true; 361 } 362 363 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 364 // success. 365 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 366 MemOpQueue &memOps, 367 unsigned memOpsBegin, unsigned memOpsEnd, 368 unsigned insertAfter, int Offset, 369 unsigned Base, bool BaseKill, 370 int Opcode, 371 ARMCC::CondCodes Pred, unsigned PredReg, 372 unsigned Scratch, 373 DebugLoc dl, 374 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 375 // First calculate which of the registers should be killed by the merged 376 // instruction. 377 const unsigned insertPos = memOps[insertAfter].Position; 378 SmallSet<unsigned, 4> KilledRegs; 379 DenseMap<unsigned, unsigned> Killer; 380 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 381 if (i == memOpsBegin) { 382 i = memOpsEnd; 383 if (i == e) 384 break; 385 } 386 if (memOps[i].Position < insertPos && memOps[i].isKill) { 387 unsigned Reg = memOps[i].Reg; 388 KilledRegs.insert(Reg); 389 Killer[Reg] = i; 390 } 391 } 392 393 SmallVector<std::pair<unsigned, bool>, 8> Regs; 394 SmallVector<unsigned, 8> ImpDefs; 395 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 396 unsigned Reg = memOps[i].Reg; 397 // If we are inserting the merged operation after an operation that 398 // uses the same register, make sure to transfer any kill flag. 399 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 400 Regs.push_back(std::make_pair(Reg, isKill)); 401 402 // Collect any implicit defs of super-registers. They must be preserved. 403 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) { 404 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead()) 405 continue; 406 unsigned DefReg = MO->getReg(); 407 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) 408 ImpDefs.push_back(DefReg); 409 } 410 } 411 412 // Try to do the merge. 413 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 414 ++Loc; 415 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 416 Pred, PredReg, Scratch, dl, Regs, ImpDefs)) 417 return; 418 419 // Merge succeeded, update records. 420 Merges.push_back(prior(Loc)); 421 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 422 // Remove kill flags from any memops that come before insertPos. 423 if (Regs[i-memOpsBegin].second) { 424 unsigned Reg = Regs[i-memOpsBegin].first; 425 if (KilledRegs.count(Reg)) { 426 unsigned j = Killer[Reg]; 427 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 428 assert(Idx >= 0 && "Cannot find killing operand"); 429 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 430 memOps[j].isKill = false; 431 } 432 memOps[i].isKill = true; 433 } 434 MBB.erase(memOps[i].MBBI); 435 // Update this memop to refer to the merged instruction. 436 // We may need to move kill flags again. 437 memOps[i].Merged = true; 438 memOps[i].MBBI = Merges.back(); 439 memOps[i].Position = insertPos; 440 } 441 } 442 443 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 444 /// load / store multiple instructions. 445 void 446 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 447 unsigned Base, int Opcode, unsigned Size, 448 ARMCC::CondCodes Pred, unsigned PredReg, 449 unsigned Scratch, MemOpQueue &MemOps, 450 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 451 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 452 int Offset = MemOps[SIndex].Offset; 453 int SOffset = Offset; 454 unsigned insertAfter = SIndex; 455 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 456 DebugLoc dl = Loc->getDebugLoc(); 457 const MachineOperand &PMO = Loc->getOperand(0); 458 unsigned PReg = PMO.getReg(); 459 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 460 : getARMRegisterNumbering(PReg); 461 unsigned Count = 1; 462 unsigned Limit = ~0U; 463 464 // vldm / vstm limit are 32 for S variants, 16 for D variants. 465 466 switch (Opcode) { 467 default: break; 468 case ARM::VSTRS: 469 Limit = 32; 470 break; 471 case ARM::VSTRD: 472 Limit = 16; 473 break; 474 case ARM::VLDRD: 475 Limit = 16; 476 break; 477 case ARM::VLDRS: 478 Limit = 32; 479 break; 480 } 481 482 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 483 int NewOffset = MemOps[i].Offset; 484 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 485 unsigned Reg = MO.getReg(); 486 unsigned RegNum = MO.isUndef() ? UINT_MAX 487 : getARMRegisterNumbering(Reg); 488 // Register numbers must be in ascending order. For VFP / NEON load and 489 // store multiples, the registers must also be consecutive and within the 490 // limit on the number of registers per instruction. 491 if (Reg != ARM::SP && 492 NewOffset == Offset + (int)Size && 493 ((isNotVFP && RegNum > PRegNum) || 494 ((Count < Limit) && RegNum == PRegNum+1))) { 495 Offset += Size; 496 PRegNum = RegNum; 497 ++Count; 498 } else { 499 // Can't merge this in. Try merge the earlier ones first. 500 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 501 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 502 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 503 MemOps, Merges); 504 return; 505 } 506 507 if (MemOps[i].Position > MemOps[insertAfter].Position) 508 insertAfter = i; 509 } 510 511 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 512 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 513 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 514 return; 515 } 516 517 static bool definesCPSR(MachineInstr *MI) { 518 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 519 const MachineOperand &MO = MI->getOperand(i); 520 if (!MO.isReg()) 521 continue; 522 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 523 // If the instruction has live CPSR def, then it's not safe to fold it 524 // into load / store. 525 return true; 526 } 527 528 return false; 529 } 530 531 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 532 unsigned Bytes, unsigned Limit, 533 ARMCC::CondCodes Pred, unsigned PredReg) { 534 unsigned MyPredReg = 0; 535 if (!MI) 536 return false; 537 538 bool CheckCPSRDef = false; 539 switch (MI->getOpcode()) { 540 default: return false; 541 case ARM::t2SUBri: 542 case ARM::SUBri: 543 CheckCPSRDef = true; 544 // fallthrough 545 case ARM::tSUBspi: 546 break; 547 } 548 549 // Make sure the offset fits in 8 bits. 550 if (Bytes == 0 || (Limit && Bytes >= Limit)) 551 return false; 552 553 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 554 if (!(MI->getOperand(0).getReg() == Base && 555 MI->getOperand(1).getReg() == Base && 556 (MI->getOperand(2).getImm()*Scale) == Bytes && 557 getInstrPredicate(MI, MyPredReg) == Pred && 558 MyPredReg == PredReg)) 559 return false; 560 561 return CheckCPSRDef ? !definesCPSR(MI) : true; 562 } 563 564 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 565 unsigned Bytes, unsigned Limit, 566 ARMCC::CondCodes Pred, unsigned PredReg) { 567 unsigned MyPredReg = 0; 568 if (!MI) 569 return false; 570 571 bool CheckCPSRDef = false; 572 switch (MI->getOpcode()) { 573 default: return false; 574 case ARM::t2ADDri: 575 case ARM::ADDri: 576 CheckCPSRDef = true; 577 // fallthrough 578 case ARM::tADDspi: 579 break; 580 } 581 582 if (Bytes == 0 || (Limit && Bytes >= Limit)) 583 // Make sure the offset fits in 8 bits. 584 return false; 585 586 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 587 if (!(MI->getOperand(0).getReg() == Base && 588 MI->getOperand(1).getReg() == Base && 589 (MI->getOperand(2).getImm()*Scale) == Bytes && 590 getInstrPredicate(MI, MyPredReg) == Pred && 591 MyPredReg == PredReg)) 592 return false; 593 594 return CheckCPSRDef ? !definesCPSR(MI) : true; 595 } 596 597 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 598 switch (MI->getOpcode()) { 599 default: return 0; 600 case ARM::LDRi12: 601 case ARM::STRi12: 602 case ARM::t2LDRi8: 603 case ARM::t2LDRi12: 604 case ARM::t2STRi8: 605 case ARM::t2STRi12: 606 case ARM::VLDRS: 607 case ARM::VSTRS: 608 return 4; 609 case ARM::VLDRD: 610 case ARM::VSTRD: 611 return 8; 612 case ARM::LDMIA: 613 case ARM::LDMDA: 614 case ARM::LDMDB: 615 case ARM::LDMIB: 616 case ARM::STMIA: 617 case ARM::STMDA: 618 case ARM::STMDB: 619 case ARM::STMIB: 620 case ARM::t2LDMIA: 621 case ARM::t2LDMDB: 622 case ARM::t2STMIA: 623 case ARM::t2STMDB: 624 case ARM::VLDMSIA: 625 case ARM::VSTMSIA: 626 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 627 case ARM::VLDMDIA: 628 case ARM::VSTMDIA: 629 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 630 } 631 } 632 633 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 634 ARM_AM::AMSubMode Mode) { 635 switch (Opc) { 636 default: llvm_unreachable("Unhandled opcode!"); 637 case ARM::LDMIA: 638 case ARM::LDMDA: 639 case ARM::LDMDB: 640 case ARM::LDMIB: 641 switch (Mode) { 642 default: llvm_unreachable("Unhandled submode!"); 643 case ARM_AM::ia: return ARM::LDMIA_UPD; 644 case ARM_AM::ib: return ARM::LDMIB_UPD; 645 case ARM_AM::da: return ARM::LDMDA_UPD; 646 case ARM_AM::db: return ARM::LDMDB_UPD; 647 } 648 case ARM::STMIA: 649 case ARM::STMDA: 650 case ARM::STMDB: 651 case ARM::STMIB: 652 switch (Mode) { 653 default: llvm_unreachable("Unhandled submode!"); 654 case ARM_AM::ia: return ARM::STMIA_UPD; 655 case ARM_AM::ib: return ARM::STMIB_UPD; 656 case ARM_AM::da: return ARM::STMDA_UPD; 657 case ARM_AM::db: return ARM::STMDB_UPD; 658 } 659 case ARM::t2LDMIA: 660 case ARM::t2LDMDB: 661 switch (Mode) { 662 default: llvm_unreachable("Unhandled submode!"); 663 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 664 case ARM_AM::db: return ARM::t2LDMDB_UPD; 665 } 666 case ARM::t2STMIA: 667 case ARM::t2STMDB: 668 switch (Mode) { 669 default: llvm_unreachable("Unhandled submode!"); 670 case ARM_AM::ia: return ARM::t2STMIA_UPD; 671 case ARM_AM::db: return ARM::t2STMDB_UPD; 672 } 673 case ARM::VLDMSIA: 674 switch (Mode) { 675 default: llvm_unreachable("Unhandled submode!"); 676 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 677 case ARM_AM::db: return ARM::VLDMSDB_UPD; 678 } 679 case ARM::VLDMDIA: 680 switch (Mode) { 681 default: llvm_unreachable("Unhandled submode!"); 682 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 683 case ARM_AM::db: return ARM::VLDMDDB_UPD; 684 } 685 case ARM::VSTMSIA: 686 switch (Mode) { 687 default: llvm_unreachable("Unhandled submode!"); 688 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 689 case ARM_AM::db: return ARM::VSTMSDB_UPD; 690 } 691 case ARM::VSTMDIA: 692 switch (Mode) { 693 default: llvm_unreachable("Unhandled submode!"); 694 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 695 case ARM_AM::db: return ARM::VSTMDDB_UPD; 696 } 697 } 698 } 699 700 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 701 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 702 /// 703 /// stmia rn, <ra, rb, rc> 704 /// rn := rn + 4 * 3; 705 /// => 706 /// stmia rn!, <ra, rb, rc> 707 /// 708 /// rn := rn - 4 * 3; 709 /// ldmia rn, <ra, rb, rc> 710 /// => 711 /// ldmdb rn!, <ra, rb, rc> 712 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 713 MachineBasicBlock::iterator MBBI, 714 bool &Advance, 715 MachineBasicBlock::iterator &I) { 716 MachineInstr *MI = MBBI; 717 unsigned Base = MI->getOperand(0).getReg(); 718 bool BaseKill = MI->getOperand(0).isKill(); 719 unsigned Bytes = getLSMultipleTransferSize(MI); 720 unsigned PredReg = 0; 721 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 722 int Opcode = MI->getOpcode(); 723 DebugLoc dl = MI->getDebugLoc(); 724 725 // Can't use an updating ld/st if the base register is also a dest 726 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 727 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 728 if (MI->getOperand(i).getReg() == Base) 729 return false; 730 731 bool DoMerge = false; 732 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 733 734 // Try merging with the previous instruction. 735 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 736 if (MBBI != BeginMBBI) { 737 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 738 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 739 --PrevMBBI; 740 if (Mode == ARM_AM::ia && 741 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 742 Mode = ARM_AM::db; 743 DoMerge = true; 744 } else if (Mode == ARM_AM::ib && 745 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 746 Mode = ARM_AM::da; 747 DoMerge = true; 748 } 749 if (DoMerge) 750 MBB.erase(PrevMBBI); 751 } 752 753 // Try merging with the next instruction. 754 MachineBasicBlock::iterator EndMBBI = MBB.end(); 755 if (!DoMerge && MBBI != EndMBBI) { 756 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 757 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 758 ++NextMBBI; 759 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 760 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 761 DoMerge = true; 762 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 763 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 764 DoMerge = true; 765 } 766 if (DoMerge) { 767 if (NextMBBI == I) { 768 Advance = true; 769 ++I; 770 } 771 MBB.erase(NextMBBI); 772 } 773 } 774 775 if (!DoMerge) 776 return false; 777 778 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 779 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 780 .addReg(Base, getDefRegState(true)) // WB base register 781 .addReg(Base, getKillRegState(BaseKill)) 782 .addImm(Pred).addReg(PredReg); 783 784 // Transfer the rest of operands. 785 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 786 MIB.addOperand(MI->getOperand(OpNum)); 787 788 // Transfer memoperands. 789 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 790 791 MBB.erase(MBBI); 792 return true; 793 } 794 795 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 796 ARM_AM::AddrOpc Mode) { 797 switch (Opc) { 798 case ARM::LDRi12: 799 return ARM::LDR_PRE_IMM; 800 case ARM::STRi12: 801 return ARM::STR_PRE_IMM; 802 case ARM::VLDRS: 803 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 804 case ARM::VLDRD: 805 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 806 case ARM::VSTRS: 807 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 808 case ARM::VSTRD: 809 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 810 case ARM::t2LDRi8: 811 case ARM::t2LDRi12: 812 return ARM::t2LDR_PRE; 813 case ARM::t2STRi8: 814 case ARM::t2STRi12: 815 return ARM::t2STR_PRE; 816 default: llvm_unreachable("Unhandled opcode!"); 817 } 818 } 819 820 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 821 ARM_AM::AddrOpc Mode) { 822 switch (Opc) { 823 case ARM::LDRi12: 824 return ARM::LDR_POST_IMM; 825 case ARM::STRi12: 826 return ARM::STR_POST_IMM; 827 case ARM::VLDRS: 828 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 829 case ARM::VLDRD: 830 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 831 case ARM::VSTRS: 832 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 833 case ARM::VSTRD: 834 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 835 case ARM::t2LDRi8: 836 case ARM::t2LDRi12: 837 return ARM::t2LDR_POST; 838 case ARM::t2STRi8: 839 case ARM::t2STRi12: 840 return ARM::t2STR_POST; 841 default: llvm_unreachable("Unhandled opcode!"); 842 } 843 } 844 845 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 846 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 847 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 848 MachineBasicBlock::iterator MBBI, 849 const TargetInstrInfo *TII, 850 bool &Advance, 851 MachineBasicBlock::iterator &I) { 852 MachineInstr *MI = MBBI; 853 unsigned Base = MI->getOperand(1).getReg(); 854 bool BaseKill = MI->getOperand(1).isKill(); 855 unsigned Bytes = getLSMultipleTransferSize(MI); 856 int Opcode = MI->getOpcode(); 857 DebugLoc dl = MI->getDebugLoc(); 858 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 859 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 860 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 861 if (isi32Load(Opcode) || isi32Store(Opcode)) 862 if (MI->getOperand(2).getImm() != 0) 863 return false; 864 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 865 return false; 866 867 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 868 // Can't do the merge if the destination register is the same as the would-be 869 // writeback register. 870 if (isLd && MI->getOperand(0).getReg() == Base) 871 return false; 872 873 unsigned PredReg = 0; 874 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 875 bool DoMerge = false; 876 ARM_AM::AddrOpc AddSub = ARM_AM::add; 877 unsigned NewOpc = 0; 878 // AM2 - 12 bits, thumb2 - 8 bits. 879 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 880 881 // Try merging with the previous instruction. 882 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 883 if (MBBI != BeginMBBI) { 884 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 885 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 886 --PrevMBBI; 887 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 888 DoMerge = true; 889 AddSub = ARM_AM::sub; 890 } else if (!isAM5 && 891 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 892 DoMerge = true; 893 } 894 if (DoMerge) { 895 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 896 MBB.erase(PrevMBBI); 897 } 898 } 899 900 // Try merging with the next instruction. 901 MachineBasicBlock::iterator EndMBBI = MBB.end(); 902 if (!DoMerge && MBBI != EndMBBI) { 903 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 904 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 905 ++NextMBBI; 906 if (!isAM5 && 907 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 908 DoMerge = true; 909 AddSub = ARM_AM::sub; 910 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 911 DoMerge = true; 912 } 913 if (DoMerge) { 914 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 915 if (NextMBBI == I) { 916 Advance = true; 917 ++I; 918 } 919 MBB.erase(NextMBBI); 920 } 921 } 922 923 if (!DoMerge) 924 return false; 925 926 if (isAM5) { 927 // VLDM[SD}_UPD, VSTM[SD]_UPD 928 // (There are no base-updating versions of VLDR/VSTR instructions, but the 929 // updating load/store-multiple instructions can be used with only one 930 // register.) 931 MachineOperand &MO = MI->getOperand(0); 932 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 933 .addReg(Base, getDefRegState(true)) // WB base register 934 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 935 .addImm(Pred).addReg(PredReg) 936 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 937 getKillRegState(MO.isKill()))); 938 } else if (isLd) { 939 if (isAM2) { 940 // LDR_PRE, LDR_POST 941 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 942 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 943 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 944 .addReg(Base, RegState::Define) 945 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 946 } else { 947 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 948 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 949 .addReg(Base, RegState::Define) 950 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 951 } 952 } else { 953 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 954 // t2LDR_PRE, t2LDR_POST 955 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 956 .addReg(Base, RegState::Define) 957 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 958 } 959 } else { 960 MachineOperand &MO = MI->getOperand(0); 961 // FIXME: post-indexed stores use am2offset_imm, which still encodes 962 // the vestigal zero-reg offset register. When that's fixed, this clause 963 // can be removed entirely. 964 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 965 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 966 // STR_PRE, STR_POST 967 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 968 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 969 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 970 } else { 971 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 972 // t2STR_PRE, t2STR_POST 973 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 974 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 975 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 976 } 977 } 978 MBB.erase(MBBI); 979 980 return true; 981 } 982 983 /// isMemoryOp - Returns true if instruction is a memory operation that this 984 /// pass is capable of operating on. 985 static bool isMemoryOp(const MachineInstr *MI) { 986 // When no memory operands are present, conservatively assume unaligned, 987 // volatile, unfoldable. 988 if (!MI->hasOneMemOperand()) 989 return false; 990 991 const MachineMemOperand *MMO = *MI->memoperands_begin(); 992 993 // Don't touch volatile memory accesses - we may be changing their order. 994 if (MMO->isVolatile()) 995 return false; 996 997 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 998 // not. 999 if (MMO->getAlignment() < 4) 1000 return false; 1001 1002 // str <undef> could probably be eliminated entirely, but for now we just want 1003 // to avoid making a mess of it. 1004 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1005 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1006 MI->getOperand(0).isUndef()) 1007 return false; 1008 1009 // Likewise don't mess with references to undefined addresses. 1010 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1011 MI->getOperand(1).isUndef()) 1012 return false; 1013 1014 int Opcode = MI->getOpcode(); 1015 switch (Opcode) { 1016 default: break; 1017 case ARM::VLDRS: 1018 case ARM::VSTRS: 1019 return MI->getOperand(1).isReg(); 1020 case ARM::VLDRD: 1021 case ARM::VSTRD: 1022 return MI->getOperand(1).isReg(); 1023 case ARM::LDRi12: 1024 case ARM::STRi12: 1025 case ARM::t2LDRi8: 1026 case ARM::t2LDRi12: 1027 case ARM::t2STRi8: 1028 case ARM::t2STRi12: 1029 return MI->getOperand(1).isReg(); 1030 } 1031 return false; 1032 } 1033 1034 /// AdvanceRS - Advance register scavenger to just before the earliest memory 1035 /// op that is being merged. 1036 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 1037 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 1038 unsigned Position = MemOps[0].Position; 1039 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1040 if (MemOps[i].Position < Position) { 1041 Position = MemOps[i].Position; 1042 Loc = MemOps[i].MBBI; 1043 } 1044 } 1045 1046 if (Loc != MBB.begin()) 1047 RS->forward(prior(Loc)); 1048 } 1049 1050 static int getMemoryOpOffset(const MachineInstr *MI) { 1051 int Opcode = MI->getOpcode(); 1052 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 1053 unsigned NumOperands = MI->getDesc().getNumOperands(); 1054 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 1055 1056 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 1057 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 1058 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 1059 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 1060 return OffField; 1061 1062 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 1063 : ARM_AM::getAM5Offset(OffField) * 4; 1064 if (isAM3) { 1065 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 1066 Offset = -Offset; 1067 } else { 1068 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 1069 Offset = -Offset; 1070 } 1071 return Offset; 1072 } 1073 1074 static void InsertLDR_STR(MachineBasicBlock &MBB, 1075 MachineBasicBlock::iterator &MBBI, 1076 int Offset, bool isDef, 1077 DebugLoc dl, unsigned NewOpc, 1078 unsigned Reg, bool RegDeadKill, bool RegUndef, 1079 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1080 bool OffKill, bool OffUndef, 1081 ARMCC::CondCodes Pred, unsigned PredReg, 1082 const TargetInstrInfo *TII, bool isT2) { 1083 if (isDef) { 1084 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1085 TII->get(NewOpc)) 1086 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1087 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1088 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1089 } else { 1090 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1091 TII->get(NewOpc)) 1092 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1093 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1094 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1095 } 1096 } 1097 1098 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1099 MachineBasicBlock::iterator &MBBI) { 1100 MachineInstr *MI = &*MBBI; 1101 unsigned Opcode = MI->getOpcode(); 1102 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1103 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1104 const MachineOperand &BaseOp = MI->getOperand(2); 1105 unsigned BaseReg = BaseOp.getReg(); 1106 unsigned EvenReg = MI->getOperand(0).getReg(); 1107 unsigned OddReg = MI->getOperand(1).getReg(); 1108 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1109 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1110 // ARM errata 602117: LDRD with base in list may result in incorrect base 1111 // register when interrupted or faulted. 1112 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); 1113 if (!Errata602117 && 1114 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) 1115 return false; 1116 1117 MachineBasicBlock::iterator NewBBI = MBBI; 1118 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1119 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1120 bool EvenDeadKill = isLd ? 1121 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1122 bool EvenUndef = MI->getOperand(0).isUndef(); 1123 bool OddDeadKill = isLd ? 1124 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1125 bool OddUndef = MI->getOperand(1).isUndef(); 1126 bool BaseKill = BaseOp.isKill(); 1127 bool BaseUndef = BaseOp.isUndef(); 1128 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1129 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1130 int OffImm = getMemoryOpOffset(MI); 1131 unsigned PredReg = 0; 1132 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1133 1134 if (OddRegNum > EvenRegNum && OffImm == 0) { 1135 // Ascending register numbers and no offset. It's safe to change it to a 1136 // ldm or stm. 1137 unsigned NewOpc = (isLd) 1138 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1139 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1140 if (isLd) { 1141 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1142 .addReg(BaseReg, getKillRegState(BaseKill)) 1143 .addImm(Pred).addReg(PredReg) 1144 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1145 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1146 ++NumLDRD2LDM; 1147 } else { 1148 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1149 .addReg(BaseReg, getKillRegState(BaseKill)) 1150 .addImm(Pred).addReg(PredReg) 1151 .addReg(EvenReg, 1152 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1153 .addReg(OddReg, 1154 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1155 ++NumSTRD2STM; 1156 } 1157 NewBBI = llvm::prior(MBBI); 1158 } else { 1159 // Split into two instructions. 1160 unsigned NewOpc = (isLd) 1161 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1162 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1163 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1164 // so adjust and use t2LDRi12 here for that. 1165 unsigned NewOpc2 = (isLd) 1166 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1167 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1168 DebugLoc dl = MBBI->getDebugLoc(); 1169 // If this is a load and base register is killed, it may have been 1170 // re-defed by the load, make sure the first load does not clobber it. 1171 if (isLd && 1172 (BaseKill || OffKill) && 1173 (TRI->regsOverlap(EvenReg, BaseReg))) { 1174 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1175 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1176 OddReg, OddDeadKill, false, 1177 BaseReg, false, BaseUndef, false, OffUndef, 1178 Pred, PredReg, TII, isT2); 1179 NewBBI = llvm::prior(MBBI); 1180 if (isT2 && NewOpc == ARM::t2LDRi8 && OffImm+4 >= 0) 1181 NewOpc = ARM::t2LDRi12; 1182 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1183 EvenReg, EvenDeadKill, false, 1184 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1185 Pred, PredReg, TII, isT2); 1186 } else { 1187 if (OddReg == EvenReg && EvenDeadKill) { 1188 // If the two source operands are the same, the kill marker is 1189 // probably on the first one. e.g. 1190 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1191 EvenDeadKill = false; 1192 OddDeadKill = true; 1193 } 1194 // Never kill the base register in the first instruction. 1195 // <rdar://problem/11101911> 1196 if (EvenReg == BaseReg) 1197 EvenDeadKill = false; 1198 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1199 EvenReg, EvenDeadKill, EvenUndef, 1200 BaseReg, false, BaseUndef, false, OffUndef, 1201 Pred, PredReg, TII, isT2); 1202 NewBBI = llvm::prior(MBBI); 1203 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1204 OddReg, OddDeadKill, OddUndef, 1205 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1206 Pred, PredReg, TII, isT2); 1207 } 1208 if (isLd) 1209 ++NumLDRD2LDR; 1210 else 1211 ++NumSTRD2STR; 1212 } 1213 1214 MBB.erase(MI); 1215 MBBI = NewBBI; 1216 return true; 1217 } 1218 return false; 1219 } 1220 1221 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1222 /// ops of the same base and incrementing offset into LDM / STM ops. 1223 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1224 unsigned NumMerges = 0; 1225 unsigned NumMemOps = 0; 1226 MemOpQueue MemOps; 1227 unsigned CurrBase = 0; 1228 int CurrOpc = -1; 1229 unsigned CurrSize = 0; 1230 ARMCC::CondCodes CurrPred = ARMCC::AL; 1231 unsigned CurrPredReg = 0; 1232 unsigned Position = 0; 1233 SmallVector<MachineBasicBlock::iterator,4> Merges; 1234 1235 RS->enterBasicBlock(&MBB); 1236 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1237 while (MBBI != E) { 1238 if (FixInvalidRegPairOp(MBB, MBBI)) 1239 continue; 1240 1241 bool Advance = false; 1242 bool TryMerge = false; 1243 bool Clobber = false; 1244 1245 bool isMemOp = isMemoryOp(MBBI); 1246 if (isMemOp) { 1247 int Opcode = MBBI->getOpcode(); 1248 unsigned Size = getLSMultipleTransferSize(MBBI); 1249 const MachineOperand &MO = MBBI->getOperand(0); 1250 unsigned Reg = MO.getReg(); 1251 bool isKill = MO.isDef() ? false : MO.isKill(); 1252 unsigned Base = MBBI->getOperand(1).getReg(); 1253 unsigned PredReg = 0; 1254 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1255 int Offset = getMemoryOpOffset(MBBI); 1256 // Watch out for: 1257 // r4 := ldr [r5] 1258 // r5 := ldr [r5, #4] 1259 // r6 := ldr [r5, #8] 1260 // 1261 // The second ldr has effectively broken the chain even though it 1262 // looks like the later ldr(s) use the same base register. Try to 1263 // merge the ldr's so far, including this one. But don't try to 1264 // combine the following ldr(s). 1265 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1266 if (CurrBase == 0 && !Clobber) { 1267 // Start of a new chain. 1268 CurrBase = Base; 1269 CurrOpc = Opcode; 1270 CurrSize = Size; 1271 CurrPred = Pred; 1272 CurrPredReg = PredReg; 1273 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1274 ++NumMemOps; 1275 Advance = true; 1276 } else { 1277 if (Clobber) { 1278 TryMerge = true; 1279 Advance = true; 1280 } 1281 1282 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1283 // No need to match PredReg. 1284 // Continue adding to the queue. 1285 if (Offset > MemOps.back().Offset) { 1286 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1287 Position, MBBI)); 1288 ++NumMemOps; 1289 Advance = true; 1290 } else { 1291 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1292 I != E; ++I) { 1293 if (Offset < I->Offset) { 1294 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1295 Position, MBBI)); 1296 ++NumMemOps; 1297 Advance = true; 1298 break; 1299 } else if (Offset == I->Offset) { 1300 // Collision! This can't be merged! 1301 break; 1302 } 1303 } 1304 } 1305 } 1306 } 1307 } 1308 1309 if (MBBI->isDebugValue()) { 1310 ++MBBI; 1311 if (MBBI == E) 1312 // Reach the end of the block, try merging the memory instructions. 1313 TryMerge = true; 1314 } else if (Advance) { 1315 ++Position; 1316 ++MBBI; 1317 if (MBBI == E) 1318 // Reach the end of the block, try merging the memory instructions. 1319 TryMerge = true; 1320 } else 1321 TryMerge = true; 1322 1323 if (TryMerge) { 1324 if (NumMemOps > 1) { 1325 // Try to find a free register to use as a new base in case it's needed. 1326 // First advance to the instruction just before the start of the chain. 1327 AdvanceRS(MBB, MemOps); 1328 // Find a scratch register. 1329 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1330 // Process the load / store instructions. 1331 RS->forward(prior(MBBI)); 1332 1333 // Merge ops. 1334 Merges.clear(); 1335 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1336 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1337 1338 // Try folding preceding/trailing base inc/dec into the generated 1339 // LDM/STM ops. 1340 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1341 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1342 ++NumMerges; 1343 NumMerges += Merges.size(); 1344 1345 // Try folding preceding/trailing base inc/dec into those load/store 1346 // that were not merged to form LDM/STM ops. 1347 for (unsigned i = 0; i != NumMemOps; ++i) 1348 if (!MemOps[i].Merged) 1349 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1350 ++NumMerges; 1351 1352 // RS may be pointing to an instruction that's deleted. 1353 RS->skipTo(prior(MBBI)); 1354 } else if (NumMemOps == 1) { 1355 // Try folding preceding/trailing base inc/dec into the single 1356 // load/store. 1357 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1358 ++NumMerges; 1359 RS->forward(prior(MBBI)); 1360 } 1361 } 1362 1363 CurrBase = 0; 1364 CurrOpc = -1; 1365 CurrSize = 0; 1366 CurrPred = ARMCC::AL; 1367 CurrPredReg = 0; 1368 if (NumMemOps) { 1369 MemOps.clear(); 1370 NumMemOps = 0; 1371 } 1372 1373 // If iterator hasn't been advanced and this is not a memory op, skip it. 1374 // It can't start a new chain anyway. 1375 if (!Advance && !isMemOp && MBBI != E) { 1376 ++Position; 1377 ++MBBI; 1378 } 1379 } 1380 } 1381 return NumMerges > 0; 1382 } 1383 1384 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1385 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it 1386 /// directly restore the value of LR into pc. 1387 /// ldmfd sp!, {..., lr} 1388 /// bx lr 1389 /// or 1390 /// ldmfd sp!, {..., lr} 1391 /// mov pc, lr 1392 /// => 1393 /// ldmfd sp!, {..., pc} 1394 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1395 if (MBB.empty()) return false; 1396 1397 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1398 if (MBBI != MBB.begin() && 1399 (MBBI->getOpcode() == ARM::BX_RET || 1400 MBBI->getOpcode() == ARM::tBX_RET || 1401 MBBI->getOpcode() == ARM::MOVPCLR)) { 1402 MachineInstr *PrevMI = prior(MBBI); 1403 unsigned Opcode = PrevMI->getOpcode(); 1404 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1405 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1406 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1407 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1408 if (MO.getReg() != ARM::LR) 1409 return false; 1410 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1411 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1412 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1413 PrevMI->setDesc(TII->get(NewOpc)); 1414 MO.setReg(ARM::PC); 1415 PrevMI->copyImplicitOps(&*MBBI); 1416 MBB.erase(MBBI); 1417 return true; 1418 } 1419 } 1420 return false; 1421 } 1422 1423 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1424 const TargetMachine &TM = Fn.getTarget(); 1425 AFI = Fn.getInfo<ARMFunctionInfo>(); 1426 TII = TM.getInstrInfo(); 1427 TRI = TM.getRegisterInfo(); 1428 STI = &TM.getSubtarget<ARMSubtarget>(); 1429 RS = new RegScavenger(); 1430 isThumb2 = AFI->isThumb2Function(); 1431 1432 bool Modified = false; 1433 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1434 ++MFI) { 1435 MachineBasicBlock &MBB = *MFI; 1436 Modified |= LoadStoreMultipleOpti(MBB); 1437 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) 1438 Modified |= MergeReturnIntoLDM(MBB); 1439 } 1440 1441 delete RS; 1442 return Modified; 1443 } 1444 1445 1446 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1447 /// load / stores from consecutive locations close to make it more 1448 /// likely they will be combined later. 1449 1450 namespace { 1451 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1452 static char ID; 1453 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1454 1455 const TargetData *TD; 1456 const TargetInstrInfo *TII; 1457 const TargetRegisterInfo *TRI; 1458 const ARMSubtarget *STI; 1459 MachineRegisterInfo *MRI; 1460 MachineFunction *MF; 1461 1462 virtual bool runOnMachineFunction(MachineFunction &Fn); 1463 1464 virtual const char *getPassName() const { 1465 return "ARM pre- register allocation load / store optimization pass"; 1466 } 1467 1468 private: 1469 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1470 unsigned &NewOpc, unsigned &EvenReg, 1471 unsigned &OddReg, unsigned &BaseReg, 1472 int &Offset, 1473 unsigned &PredReg, ARMCC::CondCodes &Pred, 1474 bool &isT2); 1475 bool RescheduleOps(MachineBasicBlock *MBB, 1476 SmallVector<MachineInstr*, 4> &Ops, 1477 unsigned Base, bool isLd, 1478 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1479 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1480 }; 1481 char ARMPreAllocLoadStoreOpt::ID = 0; 1482 } 1483 1484 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1485 TD = Fn.getTarget().getTargetData(); 1486 TII = Fn.getTarget().getInstrInfo(); 1487 TRI = Fn.getTarget().getRegisterInfo(); 1488 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1489 MRI = &Fn.getRegInfo(); 1490 MF = &Fn; 1491 1492 bool Modified = false; 1493 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1494 ++MFI) 1495 Modified |= RescheduleLoadStoreInstrs(MFI); 1496 1497 return Modified; 1498 } 1499 1500 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1501 MachineBasicBlock::iterator I, 1502 MachineBasicBlock::iterator E, 1503 SmallPtrSet<MachineInstr*, 4> &MemOps, 1504 SmallSet<unsigned, 4> &MemRegs, 1505 const TargetRegisterInfo *TRI) { 1506 // Are there stores / loads / calls between them? 1507 // FIXME: This is overly conservative. We should make use of alias information 1508 // some day. 1509 SmallSet<unsigned, 4> AddedRegPressure; 1510 while (++I != E) { 1511 if (I->isDebugValue() || MemOps.count(&*I)) 1512 continue; 1513 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1514 return false; 1515 if (isLd && I->mayStore()) 1516 return false; 1517 if (!isLd) { 1518 if (I->mayLoad()) 1519 return false; 1520 // It's not safe to move the first 'str' down. 1521 // str r1, [r0] 1522 // strh r5, [r0] 1523 // str r4, [r0, #+4] 1524 if (I->mayStore()) 1525 return false; 1526 } 1527 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1528 MachineOperand &MO = I->getOperand(j); 1529 if (!MO.isReg()) 1530 continue; 1531 unsigned Reg = MO.getReg(); 1532 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1533 return false; 1534 if (Reg != Base && !MemRegs.count(Reg)) 1535 AddedRegPressure.insert(Reg); 1536 } 1537 } 1538 1539 // Estimate register pressure increase due to the transformation. 1540 if (MemRegs.size() <= 4) 1541 // Ok if we are moving small number of instructions. 1542 return true; 1543 return AddedRegPressure.size() <= MemRegs.size() * 2; 1544 } 1545 1546 1547 /// Copy Op0 and Op1 operands into a new array assigned to MI. 1548 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1549 MachineInstr *Op1) { 1550 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1551 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1552 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1553 1554 MachineFunction *MF = MI->getParent()->getParent(); 1555 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1556 MachineSDNode::mmo_iterator MemEnd = 1557 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1558 MemEnd = 1559 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1560 MI->setMemRefs(MemBegin, MemEnd); 1561 } 1562 1563 bool 1564 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1565 DebugLoc &dl, 1566 unsigned &NewOpc, unsigned &EvenReg, 1567 unsigned &OddReg, unsigned &BaseReg, 1568 int &Offset, unsigned &PredReg, 1569 ARMCC::CondCodes &Pred, 1570 bool &isT2) { 1571 // Make sure we're allowed to generate LDRD/STRD. 1572 if (!STI->hasV5TEOps()) 1573 return false; 1574 1575 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1576 unsigned Scale = 1; 1577 unsigned Opcode = Op0->getOpcode(); 1578 if (Opcode == ARM::LDRi12) 1579 NewOpc = ARM::LDRD; 1580 else if (Opcode == ARM::STRi12) 1581 NewOpc = ARM::STRD; 1582 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1583 NewOpc = ARM::t2LDRDi8; 1584 Scale = 4; 1585 isT2 = true; 1586 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1587 NewOpc = ARM::t2STRDi8; 1588 Scale = 4; 1589 isT2 = true; 1590 } else 1591 return false; 1592 1593 // Make sure the base address satisfies i64 ld / st alignment requirement. 1594 if (!Op0->hasOneMemOperand() || 1595 !(*Op0->memoperands_begin())->getValue() || 1596 (*Op0->memoperands_begin())->isVolatile()) 1597 return false; 1598 1599 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1600 const Function *Func = MF->getFunction(); 1601 unsigned ReqAlign = STI->hasV6Ops() 1602 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1603 : 8; // Pre-v6 need 8-byte align 1604 if (Align < ReqAlign) 1605 return false; 1606 1607 // Then make sure the immediate offset fits. 1608 int OffImm = getMemoryOpOffset(Op0); 1609 if (isT2) { 1610 int Limit = (1 << 8) * Scale; 1611 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 1612 return false; 1613 Offset = OffImm; 1614 } else { 1615 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1616 if (OffImm < 0) { 1617 AddSub = ARM_AM::sub; 1618 OffImm = - OffImm; 1619 } 1620 int Limit = (1 << 8) * Scale; 1621 if (OffImm >= Limit || (OffImm & (Scale-1))) 1622 return false; 1623 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1624 } 1625 EvenReg = Op0->getOperand(0).getReg(); 1626 OddReg = Op1->getOperand(0).getReg(); 1627 if (EvenReg == OddReg) 1628 return false; 1629 BaseReg = Op0->getOperand(1).getReg(); 1630 Pred = getInstrPredicate(Op0, PredReg); 1631 dl = Op0->getDebugLoc(); 1632 return true; 1633 } 1634 1635 namespace { 1636 struct OffsetCompare { 1637 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1638 int LOffset = getMemoryOpOffset(LHS); 1639 int ROffset = getMemoryOpOffset(RHS); 1640 assert(LHS == RHS || LOffset != ROffset); 1641 return LOffset > ROffset; 1642 } 1643 }; 1644 } 1645 1646 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1647 SmallVector<MachineInstr*, 4> &Ops, 1648 unsigned Base, bool isLd, 1649 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1650 bool RetVal = false; 1651 1652 // Sort by offset (in reverse order). 1653 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1654 1655 // The loads / stores of the same base are in order. Scan them from first to 1656 // last and check for the following: 1657 // 1. Any def of base. 1658 // 2. Any gaps. 1659 while (Ops.size() > 1) { 1660 unsigned FirstLoc = ~0U; 1661 unsigned LastLoc = 0; 1662 MachineInstr *FirstOp = 0; 1663 MachineInstr *LastOp = 0; 1664 int LastOffset = 0; 1665 unsigned LastOpcode = 0; 1666 unsigned LastBytes = 0; 1667 unsigned NumMove = 0; 1668 for (int i = Ops.size() - 1; i >= 0; --i) { 1669 MachineInstr *Op = Ops[i]; 1670 unsigned Loc = MI2LocMap[Op]; 1671 if (Loc <= FirstLoc) { 1672 FirstLoc = Loc; 1673 FirstOp = Op; 1674 } 1675 if (Loc >= LastLoc) { 1676 LastLoc = Loc; 1677 LastOp = Op; 1678 } 1679 1680 unsigned LSMOpcode 1681 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 1682 if (LastOpcode && LSMOpcode != LastOpcode) 1683 break; 1684 1685 int Offset = getMemoryOpOffset(Op); 1686 unsigned Bytes = getLSMultipleTransferSize(Op); 1687 if (LastBytes) { 1688 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1689 break; 1690 } 1691 LastOffset = Offset; 1692 LastBytes = Bytes; 1693 LastOpcode = LSMOpcode; 1694 if (++NumMove == 8) // FIXME: Tune this limit. 1695 break; 1696 } 1697 1698 if (NumMove <= 1) 1699 Ops.pop_back(); 1700 else { 1701 SmallPtrSet<MachineInstr*, 4> MemOps; 1702 SmallSet<unsigned, 4> MemRegs; 1703 for (int i = NumMove-1; i >= 0; --i) { 1704 MemOps.insert(Ops[i]); 1705 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1706 } 1707 1708 // Be conservative, if the instructions are too far apart, don't 1709 // move them. We want to limit the increase of register pressure. 1710 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1711 if (DoMove) 1712 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1713 MemOps, MemRegs, TRI); 1714 if (!DoMove) { 1715 for (unsigned i = 0; i != NumMove; ++i) 1716 Ops.pop_back(); 1717 } else { 1718 // This is the new location for the loads / stores. 1719 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1720 while (InsertPos != MBB->end() 1721 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 1722 ++InsertPos; 1723 1724 // If we are moving a pair of loads / stores, see if it makes sense 1725 // to try to allocate a pair of registers that can form register pairs. 1726 MachineInstr *Op0 = Ops.back(); 1727 MachineInstr *Op1 = Ops[Ops.size()-2]; 1728 unsigned EvenReg = 0, OddReg = 0; 1729 unsigned BaseReg = 0, PredReg = 0; 1730 ARMCC::CondCodes Pred = ARMCC::AL; 1731 bool isT2 = false; 1732 unsigned NewOpc = 0; 1733 int Offset = 0; 1734 DebugLoc dl; 1735 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1736 EvenReg, OddReg, BaseReg, 1737 Offset, PredReg, Pred, isT2)) { 1738 Ops.pop_back(); 1739 Ops.pop_back(); 1740 1741 const MCInstrDesc &MCID = TII->get(NewOpc); 1742 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI); 1743 MRI->constrainRegClass(EvenReg, TRC); 1744 MRI->constrainRegClass(OddReg, TRC); 1745 1746 // Form the pair instruction. 1747 if (isLd) { 1748 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1749 .addReg(EvenReg, RegState::Define) 1750 .addReg(OddReg, RegState::Define) 1751 .addReg(BaseReg); 1752 // FIXME: We're converting from LDRi12 to an insn that still 1753 // uses addrmode2, so we need an explicit offset reg. It should 1754 // always by reg0 since we're transforming LDRi12s. 1755 if (!isT2) 1756 MIB.addReg(0); 1757 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1758 concatenateMemOperands(MIB, Op0, Op1); 1759 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 1760 ++NumLDRDFormed; 1761 } else { 1762 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1763 .addReg(EvenReg) 1764 .addReg(OddReg) 1765 .addReg(BaseReg); 1766 // FIXME: We're converting from LDRi12 to an insn that still 1767 // uses addrmode2, so we need an explicit offset reg. It should 1768 // always by reg0 since we're transforming STRi12s. 1769 if (!isT2) 1770 MIB.addReg(0); 1771 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1772 concatenateMemOperands(MIB, Op0, Op1); 1773 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 1774 ++NumSTRDFormed; 1775 } 1776 MBB->erase(Op0); 1777 MBB->erase(Op1); 1778 1779 // Add register allocation hints to form register pairs. 1780 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1781 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1782 } else { 1783 for (unsigned i = 0; i != NumMove; ++i) { 1784 MachineInstr *Op = Ops.back(); 1785 Ops.pop_back(); 1786 MBB->splice(InsertPos, MBB, Op); 1787 } 1788 } 1789 1790 NumLdStMoved += NumMove; 1791 RetVal = true; 1792 } 1793 } 1794 } 1795 1796 return RetVal; 1797 } 1798 1799 bool 1800 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1801 bool RetVal = false; 1802 1803 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1804 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1805 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1806 SmallVector<unsigned, 4> LdBases; 1807 SmallVector<unsigned, 4> StBases; 1808 1809 unsigned Loc = 0; 1810 MachineBasicBlock::iterator MBBI = MBB->begin(); 1811 MachineBasicBlock::iterator E = MBB->end(); 1812 while (MBBI != E) { 1813 for (; MBBI != E; ++MBBI) { 1814 MachineInstr *MI = MBBI; 1815 if (MI->isCall() || MI->isTerminator()) { 1816 // Stop at barriers. 1817 ++MBBI; 1818 break; 1819 } 1820 1821 if (!MI->isDebugValue()) 1822 MI2LocMap[MI] = ++Loc; 1823 1824 if (!isMemoryOp(MI)) 1825 continue; 1826 unsigned PredReg = 0; 1827 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 1828 continue; 1829 1830 int Opc = MI->getOpcode(); 1831 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1832 unsigned Base = MI->getOperand(1).getReg(); 1833 int Offset = getMemoryOpOffset(MI); 1834 1835 bool StopHere = false; 1836 if (isLd) { 1837 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1838 Base2LdsMap.find(Base); 1839 if (BI != Base2LdsMap.end()) { 1840 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1841 if (Offset == getMemoryOpOffset(BI->second[i])) { 1842 StopHere = true; 1843 break; 1844 } 1845 } 1846 if (!StopHere) 1847 BI->second.push_back(MI); 1848 } else { 1849 SmallVector<MachineInstr*, 4> MIs; 1850 MIs.push_back(MI); 1851 Base2LdsMap[Base] = MIs; 1852 LdBases.push_back(Base); 1853 } 1854 } else { 1855 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1856 Base2StsMap.find(Base); 1857 if (BI != Base2StsMap.end()) { 1858 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1859 if (Offset == getMemoryOpOffset(BI->second[i])) { 1860 StopHere = true; 1861 break; 1862 } 1863 } 1864 if (!StopHere) 1865 BI->second.push_back(MI); 1866 } else { 1867 SmallVector<MachineInstr*, 4> MIs; 1868 MIs.push_back(MI); 1869 Base2StsMap[Base] = MIs; 1870 StBases.push_back(Base); 1871 } 1872 } 1873 1874 if (StopHere) { 1875 // Found a duplicate (a base+offset combination that's seen earlier). 1876 // Backtrack. 1877 --Loc; 1878 break; 1879 } 1880 } 1881 1882 // Re-schedule loads. 1883 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1884 unsigned Base = LdBases[i]; 1885 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1886 if (Lds.size() > 1) 1887 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1888 } 1889 1890 // Re-schedule stores. 1891 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1892 unsigned Base = StBases[i]; 1893 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1894 if (Sts.size() > 1) 1895 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1896 } 1897 1898 if (MBBI != E) { 1899 Base2LdsMap.clear(); 1900 Base2StsMap.clear(); 1901 LdBases.clear(); 1902 StBases.clear(); 1903 } 1904 } 1905 1906 return RetVal; 1907 } 1908 1909 1910 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1911 /// optimization pass. 1912 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1913 if (PreAlloc) 1914 return new ARMPreAllocLoadStoreOpt(); 1915 return new ARMLoadStoreOpt(); 1916 } 1917