1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "Thumb1RegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterScavenging.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/IR/DataLayout.h" 37 #include "llvm/IR/DerivedTypes.h" 38 #include "llvm/IR/Function.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/ErrorHandling.h" 41 #include "llvm/Target/TargetInstrInfo.h" 42 #include "llvm/Target/TargetMachine.h" 43 #include "llvm/Target/TargetRegisterInfo.h" 44 using namespace llvm; 45 46 #define DEBUG_TYPE "arm-ldst-opt" 47 48 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 49 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 50 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 51 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 52 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 53 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 54 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 55 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 56 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 57 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 58 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 59 60 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 61 /// load / store instructions to form ldm / stm instructions. 62 63 namespace { 64 struct ARMLoadStoreOpt : public MachineFunctionPass { 65 static char ID; 66 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 67 68 const TargetInstrInfo *TII; 69 const TargetRegisterInfo *TRI; 70 const ARMSubtarget *STI; 71 const TargetLowering *TL; 72 ARMFunctionInfo *AFI; 73 RegScavenger *RS; 74 bool isThumb1, isThumb2; 75 76 bool runOnMachineFunction(MachineFunction &Fn) override; 77 78 const char *getPassName() const override { 79 return "ARM load / store optimization pass"; 80 } 81 82 private: 83 struct MemOpQueueEntry { 84 int Offset; 85 unsigned Reg; 86 bool isKill; 87 unsigned Position; 88 MachineBasicBlock::iterator MBBI; 89 bool Merged; 90 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 91 MachineBasicBlock::iterator i) 92 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 93 }; 94 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 95 typedef MemOpQueue::iterator MemOpQueueIter; 96 97 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, 98 const MemOpQueue &MemOps, unsigned DefReg, 99 unsigned RangeBegin, unsigned RangeEnd); 100 void UpdateBaseRegUses(MachineBasicBlock &MBB, 101 MachineBasicBlock::iterator MBBI, 102 DebugLoc dl, unsigned Base, unsigned WordOffset, 103 ARMCC::CondCodes Pred, unsigned PredReg); 104 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 105 int Offset, unsigned Base, bool BaseKill, int Opcode, 106 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 107 DebugLoc dl, 108 ArrayRef<std::pair<unsigned, bool> > Regs, 109 ArrayRef<unsigned> ImpDefs); 110 void MergeOpsUpdate(MachineBasicBlock &MBB, 111 MemOpQueue &MemOps, 112 unsigned memOpsBegin, 113 unsigned memOpsEnd, 114 unsigned insertAfter, 115 int Offset, 116 unsigned Base, 117 bool BaseKill, 118 int Opcode, 119 ARMCC::CondCodes Pred, 120 unsigned PredReg, 121 unsigned Scratch, 122 DebugLoc dl, 123 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 124 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 125 int Opcode, unsigned Size, 126 ARMCC::CondCodes Pred, unsigned PredReg, 127 unsigned Scratch, MemOpQueue &MemOps, 128 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 129 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 130 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 131 MachineBasicBlock::iterator &MBBI); 132 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 133 MachineBasicBlock::iterator MBBI, 134 const TargetInstrInfo *TII, 135 bool &Advance, 136 MachineBasicBlock::iterator &I); 137 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 138 MachineBasicBlock::iterator MBBI, 139 bool &Advance, 140 MachineBasicBlock::iterator &I); 141 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 142 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 143 }; 144 char ARMLoadStoreOpt::ID = 0; 145 } 146 147 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 148 switch (Opcode) { 149 default: llvm_unreachable("Unhandled opcode!"); 150 case ARM::LDRi12: 151 ++NumLDMGened; 152 switch (Mode) { 153 default: llvm_unreachable("Unhandled submode!"); 154 case ARM_AM::ia: return ARM::LDMIA; 155 case ARM_AM::da: return ARM::LDMDA; 156 case ARM_AM::db: return ARM::LDMDB; 157 case ARM_AM::ib: return ARM::LDMIB; 158 } 159 case ARM::STRi12: 160 ++NumSTMGened; 161 switch (Mode) { 162 default: llvm_unreachable("Unhandled submode!"); 163 case ARM_AM::ia: return ARM::STMIA; 164 case ARM_AM::da: return ARM::STMDA; 165 case ARM_AM::db: return ARM::STMDB; 166 case ARM_AM::ib: return ARM::STMIB; 167 } 168 case ARM::tLDRi: 169 // tLDMIA is writeback-only - unless the base register is in the input 170 // reglist. 171 ++NumLDMGened; 172 switch (Mode) { 173 default: llvm_unreachable("Unhandled submode!"); 174 case ARM_AM::ia: return ARM::tLDMIA; 175 } 176 case ARM::tSTRi: 177 // There is no non-writeback tSTMIA either. 178 ++NumSTMGened; 179 switch (Mode) { 180 default: llvm_unreachable("Unhandled submode!"); 181 case ARM_AM::ia: return ARM::tSTMIA_UPD; 182 } 183 case ARM::t2LDRi8: 184 case ARM::t2LDRi12: 185 ++NumLDMGened; 186 switch (Mode) { 187 default: llvm_unreachable("Unhandled submode!"); 188 case ARM_AM::ia: return ARM::t2LDMIA; 189 case ARM_AM::db: return ARM::t2LDMDB; 190 } 191 case ARM::t2STRi8: 192 case ARM::t2STRi12: 193 ++NumSTMGened; 194 switch (Mode) { 195 default: llvm_unreachable("Unhandled submode!"); 196 case ARM_AM::ia: return ARM::t2STMIA; 197 case ARM_AM::db: return ARM::t2STMDB; 198 } 199 case ARM::VLDRS: 200 ++NumVLDMGened; 201 switch (Mode) { 202 default: llvm_unreachable("Unhandled submode!"); 203 case ARM_AM::ia: return ARM::VLDMSIA; 204 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 205 } 206 case ARM::VSTRS: 207 ++NumVSTMGened; 208 switch (Mode) { 209 default: llvm_unreachable("Unhandled submode!"); 210 case ARM_AM::ia: return ARM::VSTMSIA; 211 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 212 } 213 case ARM::VLDRD: 214 ++NumVLDMGened; 215 switch (Mode) { 216 default: llvm_unreachable("Unhandled submode!"); 217 case ARM_AM::ia: return ARM::VLDMDIA; 218 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 219 } 220 case ARM::VSTRD: 221 ++NumVSTMGened; 222 switch (Mode) { 223 default: llvm_unreachable("Unhandled submode!"); 224 case ARM_AM::ia: return ARM::VSTMDIA; 225 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 226 } 227 } 228 } 229 230 namespace llvm { 231 namespace ARM_AM { 232 233 AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 234 switch (Opcode) { 235 default: llvm_unreachable("Unhandled opcode!"); 236 case ARM::LDMIA_RET: 237 case ARM::LDMIA: 238 case ARM::LDMIA_UPD: 239 case ARM::STMIA: 240 case ARM::STMIA_UPD: 241 case ARM::tLDMIA: 242 case ARM::tLDMIA_UPD: 243 case ARM::tSTMIA_UPD: 244 case ARM::t2LDMIA_RET: 245 case ARM::t2LDMIA: 246 case ARM::t2LDMIA_UPD: 247 case ARM::t2STMIA: 248 case ARM::t2STMIA_UPD: 249 case ARM::VLDMSIA: 250 case ARM::VLDMSIA_UPD: 251 case ARM::VSTMSIA: 252 case ARM::VSTMSIA_UPD: 253 case ARM::VLDMDIA: 254 case ARM::VLDMDIA_UPD: 255 case ARM::VSTMDIA: 256 case ARM::VSTMDIA_UPD: 257 return ARM_AM::ia; 258 259 case ARM::LDMDA: 260 case ARM::LDMDA_UPD: 261 case ARM::STMDA: 262 case ARM::STMDA_UPD: 263 return ARM_AM::da; 264 265 case ARM::LDMDB: 266 case ARM::LDMDB_UPD: 267 case ARM::STMDB: 268 case ARM::STMDB_UPD: 269 case ARM::t2LDMDB: 270 case ARM::t2LDMDB_UPD: 271 case ARM::t2STMDB: 272 case ARM::t2STMDB_UPD: 273 case ARM::VLDMSDB_UPD: 274 case ARM::VSTMSDB_UPD: 275 case ARM::VLDMDDB_UPD: 276 case ARM::VSTMDDB_UPD: 277 return ARM_AM::db; 278 279 case ARM::LDMIB: 280 case ARM::LDMIB_UPD: 281 case ARM::STMIB: 282 case ARM::STMIB_UPD: 283 return ARM_AM::ib; 284 } 285 } 286 287 } // end namespace ARM_AM 288 } // end namespace llvm 289 290 static bool isT1i32Load(unsigned Opc) { 291 return Opc == ARM::tLDRi; 292 } 293 294 static bool isT2i32Load(unsigned Opc) { 295 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 296 } 297 298 static bool isi32Load(unsigned Opc) { 299 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 300 } 301 302 static bool isT1i32Store(unsigned Opc) { 303 return Opc == ARM::tSTRi; 304 } 305 306 static bool isT2i32Store(unsigned Opc) { 307 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 308 } 309 310 static bool isi32Store(unsigned Opc) { 311 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 312 } 313 314 static unsigned getImmScale(unsigned Opc) { 315 switch (Opc) { 316 default: llvm_unreachable("Unhandled opcode!"); 317 case ARM::tLDRi: 318 case ARM::tSTRi: 319 return 1; 320 case ARM::tLDRHi: 321 case ARM::tSTRHi: 322 return 2; 323 case ARM::tLDRBi: 324 case ARM::tSTRBi: 325 return 4; 326 } 327 } 328 329 /// Update future uses of the base register with the offset introduced 330 /// due to writeback. This function only works on Thumb1. 331 void 332 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 333 MachineBasicBlock::iterator MBBI, 334 DebugLoc dl, unsigned Base, 335 unsigned WordOffset, 336 ARMCC::CondCodes Pred, unsigned PredReg) { 337 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 338 339 // Start updating any instructions with immediate offsets. Insert a sub before 340 // the first non-updateable instruction (if any). 341 for (; MBBI != MBB.end(); ++MBBI) { 342 if (MBBI->readsRegister(Base)) { 343 unsigned Opc = MBBI->getOpcode(); 344 int Offset; 345 bool InsertSub = false; 346 347 if (Opc == ARM::tLDRi || Opc == ARM::tSTRi || 348 Opc == ARM::tLDRHi || Opc == ARM::tSTRHi || 349 Opc == ARM::tLDRBi || Opc == ARM::tSTRBi) { 350 // Loads and stores with immediate offsets can be updated, but only if 351 // the new offset isn't negative. 352 // The MachineOperand containing the offset immediate is the last one 353 // before predicates. 354 MachineOperand &MO = 355 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 356 // The offsets are scaled by 1, 2 or 4 depending on the Opcode 357 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 358 if (Offset >= 0) 359 MO.setImm(Offset); 360 else 361 InsertSub = true; 362 363 } else if (Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) { 364 // SUB/ADD using this register. Merge it with the update. 365 // If the merged offset is too large, insert a new sub instead. 366 MachineOperand &MO = 367 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 368 Offset = (Opc == ARM::tSUBi8) ? 369 MO.getImm() + WordOffset * 4 : 370 MO.getImm() - WordOffset * 4 ; 371 if (TL->isLegalAddImmediate(Offset)) { 372 MO.setImm(Offset); 373 // The base register has now been reset, so exit early. 374 return; 375 } else { 376 InsertSub = true; 377 } 378 379 } else { 380 // Can't update the instruction. 381 InsertSub = true; 382 } 383 384 if (InsertSub) { 385 // An instruction above couldn't be updated, so insert a sub. 386 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base)) 387 .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4) 388 .addImm(Pred).addReg(PredReg); 389 return; 390 } 391 } 392 393 if (MBBI->killsRegister(Base)) 394 // Register got killed. Stop updating. 395 return; 396 } 397 398 // The end of the block was reached. This means register liveness escapes the 399 // block, and it's necessary to insert a sub before the last instruction. 400 if (MBB.succ_size() > 0) 401 // But only insert the SUB if there is actually a successor block. 402 // FIXME: Check more carefully if register is live at this point, e.g. by 403 // also examining the successor block's register liveness information. 404 AddDefaultT1CC(BuildMI(MBB, --MBBI, dl, TII->get(ARM::tSUBi8), Base)) 405 .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4) 406 .addImm(Pred).addReg(PredReg); 407 } 408 409 /// MergeOps - Create and insert a LDM or STM with Base as base register and 410 /// registers in Regs as the register operands that would be loaded / stored. 411 /// It returns true if the transformation is done. 412 bool 413 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 414 MachineBasicBlock::iterator MBBI, 415 int Offset, unsigned Base, bool BaseKill, 416 int Opcode, ARMCC::CondCodes Pred, 417 unsigned PredReg, unsigned Scratch, DebugLoc dl, 418 ArrayRef<std::pair<unsigned, bool> > Regs, 419 ArrayRef<unsigned> ImpDefs) { 420 // Only a single register to load / store. Don't bother. 421 unsigned NumRegs = Regs.size(); 422 if (NumRegs <= 1) 423 return false; 424 425 ARM_AM::AMSubMode Mode = ARM_AM::ia; 426 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 427 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 428 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 429 430 if (Offset == 4 && haveIBAndDA) { 431 Mode = ARM_AM::ib; 432 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 433 Mode = ARM_AM::da; 434 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 435 // VLDM/VSTM do not support DB mode without also updating the base reg. 436 Mode = ARM_AM::db; 437 } else if (Offset != 0) { 438 // Check if this is a supported opcode before inserting instructions to 439 // calculate a new base register. 440 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 441 442 // If starting offset isn't zero, insert a MI to materialize a new base. 443 // But only do so if it is cost effective, i.e. merging more than two 444 // loads / stores. 445 if (NumRegs <= 2) 446 return false; 447 448 unsigned NewBase; 449 if (isi32Load(Opcode)) { 450 // If it is a load, then just use one of the destination register to 451 // use as the new base. 452 NewBase = Regs[NumRegs-1].first; 453 } else { 454 // Use the scratch register to use as a new base. 455 NewBase = Scratch; 456 if (NewBase == 0) 457 return false; 458 } 459 460 int BaseOpc = 461 isThumb2 ? ARM::t2ADDri : 462 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 463 464 if (Offset < 0) { 465 BaseOpc = 466 isThumb2 ? ARM::t2SUBri : 467 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 468 Offset = - Offset; 469 } 470 471 if (!TL->isLegalAddImmediate(Offset)) 472 // FIXME: Try add with register operand? 473 return false; // Probably not worth it then. 474 475 if (isThumb1) { 476 if (Base != NewBase) { 477 // Need to insert a MOV to the new base first. 478 // FIXME: If the immediate fits in 3 bits, use ADD instead. 479 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) 480 .addReg(Base, getKillRegState(BaseKill)) 481 .addImm(Pred).addReg(PredReg); 482 } 483 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)) 484 .addReg(NewBase, getKillRegState(true)).addImm(Offset) 485 .addImm(Pred).addReg(PredReg); 486 } else { 487 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 488 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 489 .addImm(Pred).addReg(PredReg).addReg(0); 490 } 491 492 Base = NewBase; 493 BaseKill = true; // New base is always killed straight away. 494 } 495 496 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 497 Opcode == ARM::VLDRD); 498 499 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 500 // base register writeback. 501 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 502 if (!Opcode) return false; 503 504 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 505 506 // Exception: If the base register is in the input reglist, Thumb1 LDM is 507 // non-writeback. Check for this. 508 if (Opcode == ARM::tLDMIA && isThumb1) 509 for (unsigned I = 0; I < NumRegs; ++I) 510 if (Base == Regs[I].first) { 511 Writeback = false; 512 break; 513 } 514 515 MachineInstrBuilder MIB; 516 517 if (Writeback) { 518 if (Opcode == ARM::tLDMIA) 519 // Update tLDMIA with writeback if necessary. 520 Opcode = ARM::tLDMIA_UPD; 521 522 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); 523 524 // Thumb1: we might need to set base writeback when building the MI. 525 MIB.addReg(Base, getDefRegState(true)) 526 .addReg(Base, getKillRegState(BaseKill)); 527 528 // The base isn't dead after a merged instruction with writeback. Update 529 // future uses of the base with the added offset (if possible), or reset 530 // the base register as necessary. 531 if (!BaseKill) 532 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); 533 } else { 534 // No writeback, simply build the MachineInstr. 535 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); 536 MIB.addReg(Base, getKillRegState(BaseKill)); 537 } 538 539 MIB.addImm(Pred).addReg(PredReg); 540 541 for (unsigned i = 0; i != NumRegs; ++i) 542 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 543 | getKillRegState(Regs[i].second)); 544 545 // Add implicit defs for super-registers. 546 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i) 547 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine); 548 549 return true; 550 } 551 552 /// \brief Find all instructions using a given imp-def within a range. 553 /// 554 /// We are trying to combine a range of instructions, one of which (located at 555 /// position RangeBegin) implicitly defines a register. The final LDM/STM will 556 /// be placed at RangeEnd, and so any uses of this definition between RangeStart 557 /// and RangeEnd must be modified to use an undefined value. 558 /// 559 /// The live range continues until we find a second definition or one of the 560 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so 561 /// we must consider all uses and decide which are relevant in a second pass. 562 void ARMLoadStoreOpt::findUsesOfImpDef( 563 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps, 564 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) { 565 std::map<unsigned, MachineOperand *> Uses; 566 unsigned LastLivePos = RangeEnd; 567 568 // First we find all uses of this register with Position between RangeBegin 569 // and RangeEnd, any or all of these could be uses of a definition at 570 // RangeBegin. We also record the latest position a definition at RangeBegin 571 // would be considered live. 572 for (unsigned i = 0; i < MemOps.size(); ++i) { 573 MachineInstr &MI = *MemOps[i].MBBI; 574 unsigned MIPosition = MemOps[i].Position; 575 if (MIPosition <= RangeBegin || MIPosition > RangeEnd) 576 continue; 577 578 // If this instruction defines the register, then any later use will be of 579 // that definition rather than ours. 580 if (MI.definesRegister(DefReg)) 581 LastLivePos = std::min(LastLivePos, MIPosition); 582 583 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg); 584 if (!UseOp) 585 continue; 586 587 // If this instruction kills the register then (assuming liveness is 588 // correct when we start) we don't need to think about anything after here. 589 if (UseOp->isKill()) 590 LastLivePos = std::min(LastLivePos, MIPosition); 591 592 Uses[MIPosition] = UseOp; 593 } 594 595 // Now we traverse the list of all uses, and append the ones that actually use 596 // our definition to the requested list. 597 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(), 598 E = Uses.end(); 599 I != E; ++I) { 600 // List is sorted by position so once we've found one out of range there 601 // will be no more to consider. 602 if (I->first > LastLivePos) 603 break; 604 UsesOfImpDefs.push_back(I->second); 605 } 606 } 607 608 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 609 // success. 610 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 611 MemOpQueue &memOps, 612 unsigned memOpsBegin, unsigned memOpsEnd, 613 unsigned insertAfter, int Offset, 614 unsigned Base, bool BaseKill, 615 int Opcode, 616 ARMCC::CondCodes Pred, unsigned PredReg, 617 unsigned Scratch, 618 DebugLoc dl, 619 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 620 // First calculate which of the registers should be killed by the merged 621 // instruction. 622 const unsigned insertPos = memOps[insertAfter].Position; 623 SmallSet<unsigned, 4> KilledRegs; 624 DenseMap<unsigned, unsigned> Killer; 625 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 626 if (i == memOpsBegin) { 627 i = memOpsEnd; 628 if (i == e) 629 break; 630 } 631 if (memOps[i].Position < insertPos && memOps[i].isKill) { 632 unsigned Reg = memOps[i].Reg; 633 KilledRegs.insert(Reg); 634 Killer[Reg] = i; 635 } 636 } 637 638 SmallVector<std::pair<unsigned, bool>, 8> Regs; 639 SmallVector<unsigned, 8> ImpDefs; 640 SmallVector<MachineOperand *, 8> UsesOfImpDefs; 641 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 642 unsigned Reg = memOps[i].Reg; 643 // If we are inserting the merged operation after an operation that 644 // uses the same register, make sure to transfer any kill flag. 645 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 646 Regs.push_back(std::make_pair(Reg, isKill)); 647 648 // Collect any implicit defs of super-registers. They must be preserved. 649 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) { 650 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead()) 651 continue; 652 unsigned DefReg = MO->getReg(); 653 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) 654 ImpDefs.push_back(DefReg); 655 656 // There may be other uses of the definition between this instruction and 657 // the eventual LDM/STM position. These should be marked undef if the 658 // merge takes place. 659 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position, 660 insertPos); 661 } 662 } 663 664 // Try to do the merge. 665 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 666 ++Loc; 667 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 668 Pred, PredReg, Scratch, dl, Regs, ImpDefs)) 669 return; 670 671 // Merge succeeded, update records. 672 Merges.push_back(std::prev(Loc)); 673 674 // In gathering loads together, we may have moved the imp-def of a register 675 // past one of its uses. This is OK, since we know better than the rest of 676 // LLVM what's OK with ARM loads and stores; but we still have to adjust the 677 // affected uses. 678 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(), 679 E = UsesOfImpDefs.end(); 680 I != E; ++I) 681 (*I)->setIsUndef(); 682 683 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 684 // Remove kill flags from any memops that come before insertPos. 685 if (Regs[i-memOpsBegin].second) { 686 unsigned Reg = Regs[i-memOpsBegin].first; 687 if (KilledRegs.count(Reg)) { 688 unsigned j = Killer[Reg]; 689 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 690 assert(Idx >= 0 && "Cannot find killing operand"); 691 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 692 memOps[j].isKill = false; 693 } 694 memOps[i].isKill = true; 695 } 696 MBB.erase(memOps[i].MBBI); 697 // Update this memop to refer to the merged instruction. 698 // We may need to move kill flags again. 699 memOps[i].Merged = true; 700 memOps[i].MBBI = Merges.back(); 701 memOps[i].Position = insertPos; 702 } 703 } 704 705 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 706 /// load / store multiple instructions. 707 void 708 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 709 unsigned Base, int Opcode, unsigned Size, 710 ARMCC::CondCodes Pred, unsigned PredReg, 711 unsigned Scratch, MemOpQueue &MemOps, 712 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 713 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 714 int Offset = MemOps[SIndex].Offset; 715 int SOffset = Offset; 716 unsigned insertAfter = SIndex; 717 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 718 DebugLoc dl = Loc->getDebugLoc(); 719 const MachineOperand &PMO = Loc->getOperand(0); 720 unsigned PReg = PMO.getReg(); 721 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 722 unsigned Count = 1; 723 unsigned Limit = ~0U; 724 725 // vldm / vstm limit are 32 for S variants, 16 for D variants. 726 727 switch (Opcode) { 728 default: break; 729 case ARM::VSTRS: 730 Limit = 32; 731 break; 732 case ARM::VSTRD: 733 Limit = 16; 734 break; 735 case ARM::VLDRD: 736 Limit = 16; 737 break; 738 case ARM::VLDRS: 739 Limit = 32; 740 break; 741 } 742 743 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 744 int NewOffset = MemOps[i].Offset; 745 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 746 unsigned Reg = MO.getReg(); 747 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 748 // Register numbers must be in ascending order. For VFP / NEON load and 749 // store multiples, the registers must also be consecutive and within the 750 // limit on the number of registers per instruction. 751 if (Reg != ARM::SP && 752 NewOffset == Offset + (int)Size && 753 ((isNotVFP && RegNum > PRegNum) || 754 ((Count < Limit) && RegNum == PRegNum+1)) && 755 // On Swift we don't want vldm/vstm to start with a odd register num 756 // because Q register unaligned vldm/vstm need more uops. 757 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) { 758 Offset += Size; 759 PRegNum = RegNum; 760 ++Count; 761 } else { 762 // Can't merge this in. Try merge the earlier ones first. 763 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 764 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 765 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 766 MemOps, Merges); 767 return; 768 } 769 770 if (MemOps[i].Position > MemOps[insertAfter].Position) 771 insertAfter = i; 772 } 773 774 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 775 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 776 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 777 } 778 779 static bool definesCPSR(MachineInstr *MI) { 780 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 781 const MachineOperand &MO = MI->getOperand(i); 782 if (!MO.isReg()) 783 continue; 784 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 785 // If the instruction has live CPSR def, then it's not safe to fold it 786 // into load / store. 787 return true; 788 } 789 790 return false; 791 } 792 793 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 794 unsigned Bytes, unsigned Limit, 795 ARMCC::CondCodes Pred, unsigned PredReg) { 796 unsigned MyPredReg = 0; 797 if (!MI) 798 return false; 799 800 bool CheckCPSRDef = false; 801 switch (MI->getOpcode()) { 802 default: return false; 803 case ARM::tSUBi8: 804 case ARM::t2SUBri: 805 case ARM::SUBri: 806 CheckCPSRDef = true; 807 // fallthrough 808 case ARM::tSUBspi: 809 break; 810 } 811 812 // Make sure the offset fits in 8 bits. 813 if (Bytes == 0 || (Limit && Bytes >= Limit)) 814 return false; 815 816 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || 817 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME 818 if (!(MI->getOperand(0).getReg() == Base && 819 MI->getOperand(1).getReg() == Base && 820 (MI->getOperand(2).getImm() * Scale) == Bytes && 821 getInstrPredicate(MI, MyPredReg) == Pred && 822 MyPredReg == PredReg)) 823 return false; 824 825 return CheckCPSRDef ? !definesCPSR(MI) : true; 826 } 827 828 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 829 unsigned Bytes, unsigned Limit, 830 ARMCC::CondCodes Pred, unsigned PredReg) { 831 unsigned MyPredReg = 0; 832 if (!MI) 833 return false; 834 835 bool CheckCPSRDef = false; 836 switch (MI->getOpcode()) { 837 default: return false; 838 case ARM::tADDi8: 839 case ARM::t2ADDri: 840 case ARM::ADDri: 841 CheckCPSRDef = true; 842 // fallthrough 843 case ARM::tADDspi: 844 break; 845 } 846 847 if (Bytes == 0 || (Limit && Bytes >= Limit)) 848 // Make sure the offset fits in 8 bits. 849 return false; 850 851 unsigned Scale = (MI->getOpcode() == ARM::tADDspi || 852 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME 853 if (!(MI->getOperand(0).getReg() == Base && 854 MI->getOperand(1).getReg() == Base && 855 (MI->getOperand(2).getImm() * Scale) == Bytes && 856 getInstrPredicate(MI, MyPredReg) == Pred && 857 MyPredReg == PredReg)) 858 return false; 859 860 return CheckCPSRDef ? !definesCPSR(MI) : true; 861 } 862 863 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 864 switch (MI->getOpcode()) { 865 default: return 0; 866 case ARM::LDRi12: 867 case ARM::STRi12: 868 case ARM::tLDRi: 869 case ARM::tSTRi: 870 case ARM::t2LDRi8: 871 case ARM::t2LDRi12: 872 case ARM::t2STRi8: 873 case ARM::t2STRi12: 874 case ARM::VLDRS: 875 case ARM::VSTRS: 876 return 4; 877 case ARM::VLDRD: 878 case ARM::VSTRD: 879 return 8; 880 case ARM::LDMIA: 881 case ARM::LDMDA: 882 case ARM::LDMDB: 883 case ARM::LDMIB: 884 case ARM::STMIA: 885 case ARM::STMDA: 886 case ARM::STMDB: 887 case ARM::STMIB: 888 case ARM::tLDMIA: 889 case ARM::tLDMIA_UPD: 890 case ARM::tSTMIA_UPD: 891 case ARM::t2LDMIA: 892 case ARM::t2LDMDB: 893 case ARM::t2STMIA: 894 case ARM::t2STMDB: 895 case ARM::VLDMSIA: 896 case ARM::VSTMSIA: 897 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 898 case ARM::VLDMDIA: 899 case ARM::VSTMDIA: 900 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 901 } 902 } 903 904 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 905 ARM_AM::AMSubMode Mode) { 906 switch (Opc) { 907 default: llvm_unreachable("Unhandled opcode!"); 908 case ARM::LDMIA: 909 case ARM::LDMDA: 910 case ARM::LDMDB: 911 case ARM::LDMIB: 912 switch (Mode) { 913 default: llvm_unreachable("Unhandled submode!"); 914 case ARM_AM::ia: return ARM::LDMIA_UPD; 915 case ARM_AM::ib: return ARM::LDMIB_UPD; 916 case ARM_AM::da: return ARM::LDMDA_UPD; 917 case ARM_AM::db: return ARM::LDMDB_UPD; 918 } 919 case ARM::STMIA: 920 case ARM::STMDA: 921 case ARM::STMDB: 922 case ARM::STMIB: 923 switch (Mode) { 924 default: llvm_unreachable("Unhandled submode!"); 925 case ARM_AM::ia: return ARM::STMIA_UPD; 926 case ARM_AM::ib: return ARM::STMIB_UPD; 927 case ARM_AM::da: return ARM::STMDA_UPD; 928 case ARM_AM::db: return ARM::STMDB_UPD; 929 } 930 case ARM::t2LDMIA: 931 case ARM::t2LDMDB: 932 switch (Mode) { 933 default: llvm_unreachable("Unhandled submode!"); 934 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 935 case ARM_AM::db: return ARM::t2LDMDB_UPD; 936 } 937 case ARM::t2STMIA: 938 case ARM::t2STMDB: 939 switch (Mode) { 940 default: llvm_unreachable("Unhandled submode!"); 941 case ARM_AM::ia: return ARM::t2STMIA_UPD; 942 case ARM_AM::db: return ARM::t2STMDB_UPD; 943 } 944 case ARM::VLDMSIA: 945 switch (Mode) { 946 default: llvm_unreachable("Unhandled submode!"); 947 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 948 case ARM_AM::db: return ARM::VLDMSDB_UPD; 949 } 950 case ARM::VLDMDIA: 951 switch (Mode) { 952 default: llvm_unreachable("Unhandled submode!"); 953 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 954 case ARM_AM::db: return ARM::VLDMDDB_UPD; 955 } 956 case ARM::VSTMSIA: 957 switch (Mode) { 958 default: llvm_unreachable("Unhandled submode!"); 959 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 960 case ARM_AM::db: return ARM::VSTMSDB_UPD; 961 } 962 case ARM::VSTMDIA: 963 switch (Mode) { 964 default: llvm_unreachable("Unhandled submode!"); 965 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 966 case ARM_AM::db: return ARM::VSTMDDB_UPD; 967 } 968 } 969 } 970 971 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 972 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 973 /// 974 /// stmia rn, <ra, rb, rc> 975 /// rn := rn + 4 * 3; 976 /// => 977 /// stmia rn!, <ra, rb, rc> 978 /// 979 /// rn := rn - 4 * 3; 980 /// ldmia rn, <ra, rb, rc> 981 /// => 982 /// ldmdb rn!, <ra, rb, rc> 983 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 984 MachineBasicBlock::iterator MBBI, 985 bool &Advance, 986 MachineBasicBlock::iterator &I) { 987 // Thumb1 is already using updating loads/stores. 988 if (isThumb1) return false; 989 990 MachineInstr *MI = MBBI; 991 unsigned Base = MI->getOperand(0).getReg(); 992 bool BaseKill = MI->getOperand(0).isKill(); 993 unsigned Bytes = getLSMultipleTransferSize(MI); 994 unsigned PredReg = 0; 995 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 996 int Opcode = MI->getOpcode(); 997 DebugLoc dl = MI->getDebugLoc(); 998 999 // Can't use an updating ld/st if the base register is also a dest 1000 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1001 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1002 if (MI->getOperand(i).getReg() == Base) 1003 return false; 1004 1005 bool DoMerge = false; 1006 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 1007 1008 // Try merging with the previous instruction. 1009 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1010 if (MBBI != BeginMBBI) { 1011 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1012 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1013 --PrevMBBI; 1014 if (Mode == ARM_AM::ia && 1015 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1016 Mode = ARM_AM::db; 1017 DoMerge = true; 1018 } else if (Mode == ARM_AM::ib && 1019 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1020 Mode = ARM_AM::da; 1021 DoMerge = true; 1022 } 1023 if (DoMerge) 1024 MBB.erase(PrevMBBI); 1025 } 1026 1027 // Try merging with the next instruction. 1028 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1029 if (!DoMerge && MBBI != EndMBBI) { 1030 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1031 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1032 ++NextMBBI; 1033 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 1034 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1035 DoMerge = true; 1036 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 1037 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1038 DoMerge = true; 1039 } 1040 if (DoMerge) { 1041 if (NextMBBI == I) { 1042 Advance = true; 1043 ++I; 1044 } 1045 MBB.erase(NextMBBI); 1046 } 1047 } 1048 1049 if (!DoMerge) 1050 return false; 1051 1052 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1053 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 1054 .addReg(Base, getDefRegState(true)) // WB base register 1055 .addReg(Base, getKillRegState(BaseKill)) 1056 .addImm(Pred).addReg(PredReg); 1057 1058 // Transfer the rest of operands. 1059 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1060 MIB.addOperand(MI->getOperand(OpNum)); 1061 1062 // Transfer memoperands. 1063 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1064 1065 MBB.erase(MBBI); 1066 return true; 1067 } 1068 1069 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1070 ARM_AM::AddrOpc Mode) { 1071 switch (Opc) { 1072 case ARM::LDRi12: 1073 return ARM::LDR_PRE_IMM; 1074 case ARM::STRi12: 1075 return ARM::STR_PRE_IMM; 1076 case ARM::VLDRS: 1077 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1078 case ARM::VLDRD: 1079 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1080 case ARM::VSTRS: 1081 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1082 case ARM::VSTRD: 1083 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1084 case ARM::t2LDRi8: 1085 case ARM::t2LDRi12: 1086 return ARM::t2LDR_PRE; 1087 case ARM::t2STRi8: 1088 case ARM::t2STRi12: 1089 return ARM::t2STR_PRE; 1090 default: llvm_unreachable("Unhandled opcode!"); 1091 } 1092 } 1093 1094 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1095 ARM_AM::AddrOpc Mode) { 1096 switch (Opc) { 1097 case ARM::LDRi12: 1098 return ARM::LDR_POST_IMM; 1099 case ARM::STRi12: 1100 return ARM::STR_POST_IMM; 1101 case ARM::VLDRS: 1102 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1103 case ARM::VLDRD: 1104 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1105 case ARM::VSTRS: 1106 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1107 case ARM::VSTRD: 1108 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1109 case ARM::t2LDRi8: 1110 case ARM::t2LDRi12: 1111 return ARM::t2LDR_POST; 1112 case ARM::t2STRi8: 1113 case ARM::t2STRi12: 1114 return ARM::t2STR_POST; 1115 default: llvm_unreachable("Unhandled opcode!"); 1116 } 1117 } 1118 1119 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 1120 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1121 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 1122 MachineBasicBlock::iterator MBBI, 1123 const TargetInstrInfo *TII, 1124 bool &Advance, 1125 MachineBasicBlock::iterator &I) { 1126 // Thumb1 doesn't have updating LDR/STR. 1127 // FIXME: Use LDM/STM with single register instead. 1128 if (isThumb1) return false; 1129 1130 MachineInstr *MI = MBBI; 1131 unsigned Base = MI->getOperand(1).getReg(); 1132 bool BaseKill = MI->getOperand(1).isKill(); 1133 unsigned Bytes = getLSMultipleTransferSize(MI); 1134 int Opcode = MI->getOpcode(); 1135 DebugLoc dl = MI->getDebugLoc(); 1136 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1137 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1138 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1139 if (isi32Load(Opcode) || isi32Store(Opcode)) 1140 if (MI->getOperand(2).getImm() != 0) 1141 return false; 1142 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1143 return false; 1144 1145 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 1146 // Can't do the merge if the destination register is the same as the would-be 1147 // writeback register. 1148 if (MI->getOperand(0).getReg() == Base) 1149 return false; 1150 1151 unsigned PredReg = 0; 1152 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1153 bool DoMerge = false; 1154 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1155 unsigned NewOpc = 0; 1156 // AM2 - 12 bits, thumb2 - 8 bits. 1157 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 1158 1159 // Try merging with the previous instruction. 1160 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1161 if (MBBI != BeginMBBI) { 1162 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1163 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1164 --PrevMBBI; 1165 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1166 DoMerge = true; 1167 AddSub = ARM_AM::sub; 1168 } else if (!isAM5 && 1169 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1170 DoMerge = true; 1171 } 1172 if (DoMerge) { 1173 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 1174 MBB.erase(PrevMBBI); 1175 } 1176 } 1177 1178 // Try merging with the next instruction. 1179 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1180 if (!DoMerge && MBBI != EndMBBI) { 1181 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1182 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1183 ++NextMBBI; 1184 if (!isAM5 && 1185 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1186 DoMerge = true; 1187 AddSub = ARM_AM::sub; 1188 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1189 DoMerge = true; 1190 } 1191 if (DoMerge) { 1192 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 1193 if (NextMBBI == I) { 1194 Advance = true; 1195 ++I; 1196 } 1197 MBB.erase(NextMBBI); 1198 } 1199 } 1200 1201 if (!DoMerge) 1202 return false; 1203 1204 if (isAM5) { 1205 // VLDM[SD]_UPD, VSTM[SD]_UPD 1206 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1207 // updating load/store-multiple instructions can be used with only one 1208 // register.) 1209 MachineOperand &MO = MI->getOperand(0); 1210 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 1211 .addReg(Base, getDefRegState(true)) // WB base register 1212 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1213 .addImm(Pred).addReg(PredReg) 1214 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1215 getKillRegState(MO.isKill()))); 1216 } else if (isLd) { 1217 if (isAM2) { 1218 // LDR_PRE, LDR_POST 1219 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1220 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1221 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1222 .addReg(Base, RegState::Define) 1223 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1224 } else { 1225 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1226 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1227 .addReg(Base, RegState::Define) 1228 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1229 } 1230 } else { 1231 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1232 // t2LDR_PRE, t2LDR_POST 1233 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1234 .addReg(Base, RegState::Define) 1235 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1236 } 1237 } else { 1238 MachineOperand &MO = MI->getOperand(0); 1239 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1240 // the vestigal zero-reg offset register. When that's fixed, this clause 1241 // can be removed entirely. 1242 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1243 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1244 // STR_PRE, STR_POST 1245 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1246 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1247 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1248 } else { 1249 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1250 // t2STR_PRE, t2STR_POST 1251 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1252 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1253 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1254 } 1255 } 1256 MBB.erase(MBBI); 1257 1258 return true; 1259 } 1260 1261 /// isMemoryOp - Returns true if instruction is a memory operation that this 1262 /// pass is capable of operating on. 1263 static bool isMemoryOp(const MachineInstr *MI) { 1264 // When no memory operands are present, conservatively assume unaligned, 1265 // volatile, unfoldable. 1266 if (!MI->hasOneMemOperand()) 1267 return false; 1268 1269 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1270 1271 // Don't touch volatile memory accesses - we may be changing their order. 1272 if (MMO->isVolatile()) 1273 return false; 1274 1275 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1276 // not. 1277 if (MMO->getAlignment() < 4) 1278 return false; 1279 1280 // str <undef> could probably be eliminated entirely, but for now we just want 1281 // to avoid making a mess of it. 1282 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1283 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1284 MI->getOperand(0).isUndef()) 1285 return false; 1286 1287 // Likewise don't mess with references to undefined addresses. 1288 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1289 MI->getOperand(1).isUndef()) 1290 return false; 1291 1292 int Opcode = MI->getOpcode(); 1293 switch (Opcode) { 1294 default: break; 1295 case ARM::VLDRS: 1296 case ARM::VSTRS: 1297 return MI->getOperand(1).isReg(); 1298 case ARM::VLDRD: 1299 case ARM::VSTRD: 1300 return MI->getOperand(1).isReg(); 1301 case ARM::LDRi12: 1302 case ARM::STRi12: 1303 case ARM::tLDRi: 1304 case ARM::tSTRi: 1305 case ARM::t2LDRi8: 1306 case ARM::t2LDRi12: 1307 case ARM::t2STRi8: 1308 case ARM::t2STRi12: 1309 return MI->getOperand(1).isReg(); 1310 } 1311 return false; 1312 } 1313 1314 /// AdvanceRS - Advance register scavenger to just before the earliest memory 1315 /// op that is being merged. 1316 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 1317 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 1318 unsigned Position = MemOps[0].Position; 1319 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1320 if (MemOps[i].Position < Position) { 1321 Position = MemOps[i].Position; 1322 Loc = MemOps[i].MBBI; 1323 } 1324 } 1325 1326 if (Loc != MBB.begin()) 1327 RS->forward(std::prev(Loc)); 1328 } 1329 1330 static int getMemoryOpOffset(const MachineInstr *MI) { 1331 int Opcode = MI->getOpcode(); 1332 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 1333 unsigned NumOperands = MI->getDesc().getNumOperands(); 1334 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 1335 1336 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 1337 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 1338 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 1339 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 1340 return OffField; 1341 1342 // Thumb1 immediate offsets are scaled by 4 1343 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi) 1344 return OffField * 4; 1345 1346 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 1347 : ARM_AM::getAM5Offset(OffField) * 4; 1348 if (isAM3) { 1349 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 1350 Offset = -Offset; 1351 } else { 1352 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 1353 Offset = -Offset; 1354 } 1355 return Offset; 1356 } 1357 1358 static void InsertLDR_STR(MachineBasicBlock &MBB, 1359 MachineBasicBlock::iterator &MBBI, 1360 int Offset, bool isDef, 1361 DebugLoc dl, unsigned NewOpc, 1362 unsigned Reg, bool RegDeadKill, bool RegUndef, 1363 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1364 bool OffKill, bool OffUndef, 1365 ARMCC::CondCodes Pred, unsigned PredReg, 1366 const TargetInstrInfo *TII, bool isT2) { 1367 if (isDef) { 1368 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1369 TII->get(NewOpc)) 1370 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1371 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1372 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1373 } else { 1374 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1375 TII->get(NewOpc)) 1376 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1377 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1378 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1379 } 1380 } 1381 1382 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1383 MachineBasicBlock::iterator &MBBI) { 1384 MachineInstr *MI = &*MBBI; 1385 unsigned Opcode = MI->getOpcode(); 1386 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1387 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1388 const MachineOperand &BaseOp = MI->getOperand(2); 1389 unsigned BaseReg = BaseOp.getReg(); 1390 unsigned EvenReg = MI->getOperand(0).getReg(); 1391 unsigned OddReg = MI->getOperand(1).getReg(); 1392 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1393 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1394 // ARM errata 602117: LDRD with base in list may result in incorrect base 1395 // register when interrupted or faulted. 1396 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); 1397 if (!Errata602117 && 1398 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) 1399 return false; 1400 1401 MachineBasicBlock::iterator NewBBI = MBBI; 1402 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1403 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1404 bool EvenDeadKill = isLd ? 1405 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1406 bool EvenUndef = MI->getOperand(0).isUndef(); 1407 bool OddDeadKill = isLd ? 1408 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1409 bool OddUndef = MI->getOperand(1).isUndef(); 1410 bool BaseKill = BaseOp.isKill(); 1411 bool BaseUndef = BaseOp.isUndef(); 1412 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1413 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1414 int OffImm = getMemoryOpOffset(MI); 1415 unsigned PredReg = 0; 1416 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1417 1418 if (OddRegNum > EvenRegNum && OffImm == 0) { 1419 // Ascending register numbers and no offset. It's safe to change it to a 1420 // ldm or stm. 1421 unsigned NewOpc = (isLd) 1422 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1423 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1424 if (isLd) { 1425 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1426 .addReg(BaseReg, getKillRegState(BaseKill)) 1427 .addImm(Pred).addReg(PredReg) 1428 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1429 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1430 ++NumLDRD2LDM; 1431 } else { 1432 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1433 .addReg(BaseReg, getKillRegState(BaseKill)) 1434 .addImm(Pred).addReg(PredReg) 1435 .addReg(EvenReg, 1436 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1437 .addReg(OddReg, 1438 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1439 ++NumSTRD2STM; 1440 } 1441 NewBBI = std::prev(MBBI); 1442 } else { 1443 // Split into two instructions. 1444 unsigned NewOpc = (isLd) 1445 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1446 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1447 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1448 // so adjust and use t2LDRi12 here for that. 1449 unsigned NewOpc2 = (isLd) 1450 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1451 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1452 DebugLoc dl = MBBI->getDebugLoc(); 1453 // If this is a load and base register is killed, it may have been 1454 // re-defed by the load, make sure the first load does not clobber it. 1455 if (isLd && 1456 (BaseKill || OffKill) && 1457 (TRI->regsOverlap(EvenReg, BaseReg))) { 1458 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1459 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1460 OddReg, OddDeadKill, false, 1461 BaseReg, false, BaseUndef, false, OffUndef, 1462 Pred, PredReg, TII, isT2); 1463 NewBBI = std::prev(MBBI); 1464 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1465 EvenReg, EvenDeadKill, false, 1466 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1467 Pred, PredReg, TII, isT2); 1468 } else { 1469 if (OddReg == EvenReg && EvenDeadKill) { 1470 // If the two source operands are the same, the kill marker is 1471 // probably on the first one. e.g. 1472 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1473 EvenDeadKill = false; 1474 OddDeadKill = true; 1475 } 1476 // Never kill the base register in the first instruction. 1477 if (EvenReg == BaseReg) 1478 EvenDeadKill = false; 1479 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1480 EvenReg, EvenDeadKill, EvenUndef, 1481 BaseReg, false, BaseUndef, false, OffUndef, 1482 Pred, PredReg, TII, isT2); 1483 NewBBI = std::prev(MBBI); 1484 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1485 OddReg, OddDeadKill, OddUndef, 1486 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1487 Pred, PredReg, TII, isT2); 1488 } 1489 if (isLd) 1490 ++NumLDRD2LDR; 1491 else 1492 ++NumSTRD2STR; 1493 } 1494 1495 MBB.erase(MI); 1496 MBBI = NewBBI; 1497 return true; 1498 } 1499 return false; 1500 } 1501 1502 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1503 /// ops of the same base and incrementing offset into LDM / STM ops. 1504 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1505 unsigned NumMerges = 0; 1506 unsigned NumMemOps = 0; 1507 MemOpQueue MemOps; 1508 unsigned CurrBase = 0; 1509 int CurrOpc = -1; 1510 unsigned CurrSize = 0; 1511 ARMCC::CondCodes CurrPred = ARMCC::AL; 1512 unsigned CurrPredReg = 0; 1513 unsigned Position = 0; 1514 SmallVector<MachineBasicBlock::iterator,4> Merges; 1515 1516 RS->enterBasicBlock(&MBB); 1517 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1518 while (MBBI != E) { 1519 if (FixInvalidRegPairOp(MBB, MBBI)) 1520 continue; 1521 1522 bool Advance = false; 1523 bool TryMerge = false; 1524 bool Clobber = false; 1525 1526 bool isMemOp = isMemoryOp(MBBI); 1527 if (isMemOp) { 1528 int Opcode = MBBI->getOpcode(); 1529 unsigned Size = getLSMultipleTransferSize(MBBI); 1530 const MachineOperand &MO = MBBI->getOperand(0); 1531 unsigned Reg = MO.getReg(); 1532 bool isKill = MO.isDef() ? false : MO.isKill(); 1533 unsigned Base = MBBI->getOperand(1).getReg(); 1534 unsigned PredReg = 0; 1535 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1536 int Offset = getMemoryOpOffset(MBBI); 1537 // Watch out for: 1538 // r4 := ldr [r5] 1539 // r5 := ldr [r5, #4] 1540 // r6 := ldr [r5, #8] 1541 // 1542 // The second ldr has effectively broken the chain even though it 1543 // looks like the later ldr(s) use the same base register. Try to 1544 // merge the ldr's so far, including this one. But don't try to 1545 // combine the following ldr(s). 1546 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1547 1548 // Watch out for: 1549 // r4 := ldr [r0, #8] 1550 // r4 := ldr [r0, #4] 1551 // 1552 // The optimization may reorder the second ldr in front of the first 1553 // ldr, which violates write after write(WAW) dependence. The same as 1554 // str. Try to merge inst(s) already in MemOps. 1555 bool Overlap = false; 1556 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { 1557 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) { 1558 Overlap = true; 1559 break; 1560 } 1561 } 1562 1563 if (CurrBase == 0 && !Clobber) { 1564 // Start of a new chain. 1565 CurrBase = Base; 1566 CurrOpc = Opcode; 1567 CurrSize = Size; 1568 CurrPred = Pred; 1569 CurrPredReg = PredReg; 1570 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1571 ++NumMemOps; 1572 Advance = true; 1573 } else if (!Overlap) { 1574 if (Clobber) { 1575 TryMerge = true; 1576 Advance = true; 1577 } 1578 1579 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1580 // No need to match PredReg. 1581 // Continue adding to the queue. 1582 if (Offset > MemOps.back().Offset) { 1583 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1584 Position, MBBI)); 1585 ++NumMemOps; 1586 Advance = true; 1587 } else { 1588 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1589 I != E; ++I) { 1590 if (Offset < I->Offset) { 1591 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1592 Position, MBBI)); 1593 ++NumMemOps; 1594 Advance = true; 1595 break; 1596 } else if (Offset == I->Offset) { 1597 // Collision! This can't be merged! 1598 break; 1599 } 1600 } 1601 } 1602 } 1603 } 1604 } 1605 1606 if (MBBI->isDebugValue()) { 1607 ++MBBI; 1608 if (MBBI == E) 1609 // Reach the end of the block, try merging the memory instructions. 1610 TryMerge = true; 1611 } else if (Advance) { 1612 ++Position; 1613 ++MBBI; 1614 if (MBBI == E) 1615 // Reach the end of the block, try merging the memory instructions. 1616 TryMerge = true; 1617 } else { 1618 TryMerge = true; 1619 } 1620 1621 if (TryMerge) { 1622 if (NumMemOps > 1) { 1623 // Try to find a free register to use as a new base in case it's needed. 1624 // First advance to the instruction just before the start of the chain. 1625 AdvanceRS(MBB, MemOps); 1626 1627 // Find a scratch register. 1628 unsigned Scratch = 1629 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); 1630 1631 // Process the load / store instructions. 1632 RS->forward(std::prev(MBBI)); 1633 1634 // Merge ops. 1635 Merges.clear(); 1636 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1637 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1638 1639 // Try folding preceding/trailing base inc/dec into the generated 1640 // LDM/STM ops. 1641 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1642 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1643 ++NumMerges; 1644 NumMerges += Merges.size(); 1645 1646 // Try folding preceding/trailing base inc/dec into those load/store 1647 // that were not merged to form LDM/STM ops. 1648 for (unsigned i = 0; i != NumMemOps; ++i) 1649 if (!MemOps[i].Merged) 1650 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1651 ++NumMerges; 1652 1653 // RS may be pointing to an instruction that's deleted. 1654 RS->skipTo(std::prev(MBBI)); 1655 } else if (NumMemOps == 1) { 1656 // Try folding preceding/trailing base inc/dec into the single 1657 // load/store. 1658 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1659 ++NumMerges; 1660 RS->forward(std::prev(MBBI)); 1661 } 1662 } 1663 1664 CurrBase = 0; 1665 CurrOpc = -1; 1666 CurrSize = 0; 1667 CurrPred = ARMCC::AL; 1668 CurrPredReg = 0; 1669 if (NumMemOps) { 1670 MemOps.clear(); 1671 NumMemOps = 0; 1672 } 1673 1674 // If iterator hasn't been advanced and this is not a memory op, skip it. 1675 // It can't start a new chain anyway. 1676 if (!Advance && !isMemOp && MBBI != E) { 1677 ++Position; 1678 ++MBBI; 1679 } 1680 } 1681 } 1682 return NumMerges > 0; 1683 } 1684 1685 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1686 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it 1687 /// directly restore the value of LR into pc. 1688 /// ldmfd sp!, {..., lr} 1689 /// bx lr 1690 /// or 1691 /// ldmfd sp!, {..., lr} 1692 /// mov pc, lr 1693 /// => 1694 /// ldmfd sp!, {..., pc} 1695 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1696 // Thumb1 LDM doesn't allow high registers. 1697 if (isThumb1) return false; 1698 if (MBB.empty()) return false; 1699 1700 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1701 if (MBBI != MBB.begin() && 1702 (MBBI->getOpcode() == ARM::BX_RET || 1703 MBBI->getOpcode() == ARM::tBX_RET || 1704 MBBI->getOpcode() == ARM::MOVPCLR)) { 1705 MachineInstr *PrevMI = std::prev(MBBI); 1706 unsigned Opcode = PrevMI->getOpcode(); 1707 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1708 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1709 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1710 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1711 if (MO.getReg() != ARM::LR) 1712 return false; 1713 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1714 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1715 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1716 PrevMI->setDesc(TII->get(NewOpc)); 1717 MO.setReg(ARM::PC); 1718 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1719 MBB.erase(MBBI); 1720 return true; 1721 } 1722 } 1723 return false; 1724 } 1725 1726 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1727 const TargetMachine &TM = Fn.getTarget(); 1728 TL = TM.getTargetLowering(); 1729 AFI = Fn.getInfo<ARMFunctionInfo>(); 1730 TII = TM.getInstrInfo(); 1731 TRI = TM.getRegisterInfo(); 1732 STI = &TM.getSubtarget<ARMSubtarget>(); 1733 RS = new RegScavenger(); 1734 isThumb2 = AFI->isThumb2Function(); 1735 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1736 1737 // FIXME: Temporarily disabling for Thumb-1 due to miscompiles 1738 if (isThumb1) { 1739 delete RS; 1740 return false; 1741 } 1742 1743 bool Modified = false; 1744 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1745 ++MFI) { 1746 MachineBasicBlock &MBB = *MFI; 1747 Modified |= LoadStoreMultipleOpti(MBB); 1748 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) 1749 Modified |= MergeReturnIntoLDM(MBB); 1750 } 1751 1752 delete RS; 1753 return Modified; 1754 } 1755 1756 1757 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1758 /// load / stores from consecutive locations close to make it more 1759 /// likely they will be combined later. 1760 1761 namespace { 1762 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1763 static char ID; 1764 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1765 1766 const DataLayout *TD; 1767 const TargetInstrInfo *TII; 1768 const TargetRegisterInfo *TRI; 1769 const ARMSubtarget *STI; 1770 MachineRegisterInfo *MRI; 1771 MachineFunction *MF; 1772 1773 bool runOnMachineFunction(MachineFunction &Fn) override; 1774 1775 const char *getPassName() const override { 1776 return "ARM pre- register allocation load / store optimization pass"; 1777 } 1778 1779 private: 1780 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1781 unsigned &NewOpc, unsigned &EvenReg, 1782 unsigned &OddReg, unsigned &BaseReg, 1783 int &Offset, 1784 unsigned &PredReg, ARMCC::CondCodes &Pred, 1785 bool &isT2); 1786 bool RescheduleOps(MachineBasicBlock *MBB, 1787 SmallVectorImpl<MachineInstr *> &Ops, 1788 unsigned Base, bool isLd, 1789 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1790 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1791 }; 1792 char ARMPreAllocLoadStoreOpt::ID = 0; 1793 } 1794 1795 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1796 TD = Fn.getTarget().getDataLayout(); 1797 TII = Fn.getTarget().getInstrInfo(); 1798 TRI = Fn.getTarget().getRegisterInfo(); 1799 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1800 MRI = &Fn.getRegInfo(); 1801 MF = &Fn; 1802 1803 bool Modified = false; 1804 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1805 ++MFI) 1806 Modified |= RescheduleLoadStoreInstrs(MFI); 1807 1808 return Modified; 1809 } 1810 1811 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1812 MachineBasicBlock::iterator I, 1813 MachineBasicBlock::iterator E, 1814 SmallPtrSet<MachineInstr*, 4> &MemOps, 1815 SmallSet<unsigned, 4> &MemRegs, 1816 const TargetRegisterInfo *TRI) { 1817 // Are there stores / loads / calls between them? 1818 // FIXME: This is overly conservative. We should make use of alias information 1819 // some day. 1820 SmallSet<unsigned, 4> AddedRegPressure; 1821 while (++I != E) { 1822 if (I->isDebugValue() || MemOps.count(&*I)) 1823 continue; 1824 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1825 return false; 1826 if (isLd && I->mayStore()) 1827 return false; 1828 if (!isLd) { 1829 if (I->mayLoad()) 1830 return false; 1831 // It's not safe to move the first 'str' down. 1832 // str r1, [r0] 1833 // strh r5, [r0] 1834 // str r4, [r0, #+4] 1835 if (I->mayStore()) 1836 return false; 1837 } 1838 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1839 MachineOperand &MO = I->getOperand(j); 1840 if (!MO.isReg()) 1841 continue; 1842 unsigned Reg = MO.getReg(); 1843 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1844 return false; 1845 if (Reg != Base && !MemRegs.count(Reg)) 1846 AddedRegPressure.insert(Reg); 1847 } 1848 } 1849 1850 // Estimate register pressure increase due to the transformation. 1851 if (MemRegs.size() <= 4) 1852 // Ok if we are moving small number of instructions. 1853 return true; 1854 return AddedRegPressure.size() <= MemRegs.size() * 2; 1855 } 1856 1857 1858 /// Copy Op0 and Op1 operands into a new array assigned to MI. 1859 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1860 MachineInstr *Op1) { 1861 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1862 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1863 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1864 1865 MachineFunction *MF = MI->getParent()->getParent(); 1866 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1867 MachineSDNode::mmo_iterator MemEnd = 1868 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1869 MemEnd = 1870 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1871 MI->setMemRefs(MemBegin, MemEnd); 1872 } 1873 1874 bool 1875 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1876 DebugLoc &dl, 1877 unsigned &NewOpc, unsigned &EvenReg, 1878 unsigned &OddReg, unsigned &BaseReg, 1879 int &Offset, unsigned &PredReg, 1880 ARMCC::CondCodes &Pred, 1881 bool &isT2) { 1882 // Make sure we're allowed to generate LDRD/STRD. 1883 if (!STI->hasV5TEOps()) 1884 return false; 1885 1886 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1887 unsigned Scale = 1; 1888 unsigned Opcode = Op0->getOpcode(); 1889 if (Opcode == ARM::LDRi12) { 1890 NewOpc = ARM::LDRD; 1891 } else if (Opcode == ARM::STRi12) { 1892 NewOpc = ARM::STRD; 1893 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1894 NewOpc = ARM::t2LDRDi8; 1895 Scale = 4; 1896 isT2 = true; 1897 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1898 NewOpc = ARM::t2STRDi8; 1899 Scale = 4; 1900 isT2 = true; 1901 } else { 1902 return false; 1903 } 1904 1905 // Make sure the base address satisfies i64 ld / st alignment requirement. 1906 // At the moment, we ignore the memoryoperand's value. 1907 // If we want to use AliasAnalysis, we should check it accordingly. 1908 if (!Op0->hasOneMemOperand() || 1909 (*Op0->memoperands_begin())->isVolatile()) 1910 return false; 1911 1912 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1913 const Function *Func = MF->getFunction(); 1914 unsigned ReqAlign = STI->hasV6Ops() 1915 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1916 : 8; // Pre-v6 need 8-byte align 1917 if (Align < ReqAlign) 1918 return false; 1919 1920 // Then make sure the immediate offset fits. 1921 int OffImm = getMemoryOpOffset(Op0); 1922 if (isT2) { 1923 int Limit = (1 << 8) * Scale; 1924 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 1925 return false; 1926 Offset = OffImm; 1927 } else { 1928 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1929 if (OffImm < 0) { 1930 AddSub = ARM_AM::sub; 1931 OffImm = - OffImm; 1932 } 1933 int Limit = (1 << 8) * Scale; 1934 if (OffImm >= Limit || (OffImm & (Scale-1))) 1935 return false; 1936 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1937 } 1938 EvenReg = Op0->getOperand(0).getReg(); 1939 OddReg = Op1->getOperand(0).getReg(); 1940 if (EvenReg == OddReg) 1941 return false; 1942 BaseReg = Op0->getOperand(1).getReg(); 1943 Pred = getInstrPredicate(Op0, PredReg); 1944 dl = Op0->getDebugLoc(); 1945 return true; 1946 } 1947 1948 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1949 SmallVectorImpl<MachineInstr *> &Ops, 1950 unsigned Base, bool isLd, 1951 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1952 bool RetVal = false; 1953 1954 // Sort by offset (in reverse order). 1955 std::sort(Ops.begin(), Ops.end(), 1956 [](const MachineInstr *LHS, const MachineInstr *RHS) { 1957 int LOffset = getMemoryOpOffset(LHS); 1958 int ROffset = getMemoryOpOffset(RHS); 1959 assert(LHS == RHS || LOffset != ROffset); 1960 return LOffset > ROffset; 1961 }); 1962 1963 // The loads / stores of the same base are in order. Scan them from first to 1964 // last and check for the following: 1965 // 1. Any def of base. 1966 // 2. Any gaps. 1967 while (Ops.size() > 1) { 1968 unsigned FirstLoc = ~0U; 1969 unsigned LastLoc = 0; 1970 MachineInstr *FirstOp = nullptr; 1971 MachineInstr *LastOp = nullptr; 1972 int LastOffset = 0; 1973 unsigned LastOpcode = 0; 1974 unsigned LastBytes = 0; 1975 unsigned NumMove = 0; 1976 for (int i = Ops.size() - 1; i >= 0; --i) { 1977 MachineInstr *Op = Ops[i]; 1978 unsigned Loc = MI2LocMap[Op]; 1979 if (Loc <= FirstLoc) { 1980 FirstLoc = Loc; 1981 FirstOp = Op; 1982 } 1983 if (Loc >= LastLoc) { 1984 LastLoc = Loc; 1985 LastOp = Op; 1986 } 1987 1988 unsigned LSMOpcode 1989 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 1990 if (LastOpcode && LSMOpcode != LastOpcode) 1991 break; 1992 1993 int Offset = getMemoryOpOffset(Op); 1994 unsigned Bytes = getLSMultipleTransferSize(Op); 1995 if (LastBytes) { 1996 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1997 break; 1998 } 1999 LastOffset = Offset; 2000 LastBytes = Bytes; 2001 LastOpcode = LSMOpcode; 2002 if (++NumMove == 8) // FIXME: Tune this limit. 2003 break; 2004 } 2005 2006 if (NumMove <= 1) 2007 Ops.pop_back(); 2008 else { 2009 SmallPtrSet<MachineInstr*, 4> MemOps; 2010 SmallSet<unsigned, 4> MemRegs; 2011 for (int i = NumMove-1; i >= 0; --i) { 2012 MemOps.insert(Ops[i]); 2013 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2014 } 2015 2016 // Be conservative, if the instructions are too far apart, don't 2017 // move them. We want to limit the increase of register pressure. 2018 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2019 if (DoMove) 2020 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2021 MemOps, MemRegs, TRI); 2022 if (!DoMove) { 2023 for (unsigned i = 0; i != NumMove; ++i) 2024 Ops.pop_back(); 2025 } else { 2026 // This is the new location for the loads / stores. 2027 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2028 while (InsertPos != MBB->end() 2029 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2030 ++InsertPos; 2031 2032 // If we are moving a pair of loads / stores, see if it makes sense 2033 // to try to allocate a pair of registers that can form register pairs. 2034 MachineInstr *Op0 = Ops.back(); 2035 MachineInstr *Op1 = Ops[Ops.size()-2]; 2036 unsigned EvenReg = 0, OddReg = 0; 2037 unsigned BaseReg = 0, PredReg = 0; 2038 ARMCC::CondCodes Pred = ARMCC::AL; 2039 bool isT2 = false; 2040 unsigned NewOpc = 0; 2041 int Offset = 0; 2042 DebugLoc dl; 2043 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2044 EvenReg, OddReg, BaseReg, 2045 Offset, PredReg, Pred, isT2)) { 2046 Ops.pop_back(); 2047 Ops.pop_back(); 2048 2049 const MCInstrDesc &MCID = TII->get(NewOpc); 2050 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2051 MRI->constrainRegClass(EvenReg, TRC); 2052 MRI->constrainRegClass(OddReg, TRC); 2053 2054 // Form the pair instruction. 2055 if (isLd) { 2056 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2057 .addReg(EvenReg, RegState::Define) 2058 .addReg(OddReg, RegState::Define) 2059 .addReg(BaseReg); 2060 // FIXME: We're converting from LDRi12 to an insn that still 2061 // uses addrmode2, so we need an explicit offset reg. It should 2062 // always by reg0 since we're transforming LDRi12s. 2063 if (!isT2) 2064 MIB.addReg(0); 2065 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2066 concatenateMemOperands(MIB, Op0, Op1); 2067 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2068 ++NumLDRDFormed; 2069 } else { 2070 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2071 .addReg(EvenReg) 2072 .addReg(OddReg) 2073 .addReg(BaseReg); 2074 // FIXME: We're converting from LDRi12 to an insn that still 2075 // uses addrmode2, so we need an explicit offset reg. It should 2076 // always by reg0 since we're transforming STRi12s. 2077 if (!isT2) 2078 MIB.addReg(0); 2079 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2080 concatenateMemOperands(MIB, Op0, Op1); 2081 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2082 ++NumSTRDFormed; 2083 } 2084 MBB->erase(Op0); 2085 MBB->erase(Op1); 2086 2087 // Add register allocation hints to form register pairs. 2088 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 2089 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 2090 } else { 2091 for (unsigned i = 0; i != NumMove; ++i) { 2092 MachineInstr *Op = Ops.back(); 2093 Ops.pop_back(); 2094 MBB->splice(InsertPos, MBB, Op); 2095 } 2096 } 2097 2098 NumLdStMoved += NumMove; 2099 RetVal = true; 2100 } 2101 } 2102 } 2103 2104 return RetVal; 2105 } 2106 2107 bool 2108 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2109 bool RetVal = false; 2110 2111 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2112 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2113 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2114 SmallVector<unsigned, 4> LdBases; 2115 SmallVector<unsigned, 4> StBases; 2116 2117 unsigned Loc = 0; 2118 MachineBasicBlock::iterator MBBI = MBB->begin(); 2119 MachineBasicBlock::iterator E = MBB->end(); 2120 while (MBBI != E) { 2121 for (; MBBI != E; ++MBBI) { 2122 MachineInstr *MI = MBBI; 2123 if (MI->isCall() || MI->isTerminator()) { 2124 // Stop at barriers. 2125 ++MBBI; 2126 break; 2127 } 2128 2129 if (!MI->isDebugValue()) 2130 MI2LocMap[MI] = ++Loc; 2131 2132 if (!isMemoryOp(MI)) 2133 continue; 2134 unsigned PredReg = 0; 2135 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2136 continue; 2137 2138 int Opc = MI->getOpcode(); 2139 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 2140 unsigned Base = MI->getOperand(1).getReg(); 2141 int Offset = getMemoryOpOffset(MI); 2142 2143 bool StopHere = false; 2144 if (isLd) { 2145 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2146 Base2LdsMap.find(Base); 2147 if (BI != Base2LdsMap.end()) { 2148 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2149 if (Offset == getMemoryOpOffset(BI->second[i])) { 2150 StopHere = true; 2151 break; 2152 } 2153 } 2154 if (!StopHere) 2155 BI->second.push_back(MI); 2156 } else { 2157 Base2LdsMap[Base].push_back(MI); 2158 LdBases.push_back(Base); 2159 } 2160 } else { 2161 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2162 Base2StsMap.find(Base); 2163 if (BI != Base2StsMap.end()) { 2164 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2165 if (Offset == getMemoryOpOffset(BI->second[i])) { 2166 StopHere = true; 2167 break; 2168 } 2169 } 2170 if (!StopHere) 2171 BI->second.push_back(MI); 2172 } else { 2173 Base2StsMap[Base].push_back(MI); 2174 StBases.push_back(Base); 2175 } 2176 } 2177 2178 if (StopHere) { 2179 // Found a duplicate (a base+offset combination that's seen earlier). 2180 // Backtrack. 2181 --Loc; 2182 break; 2183 } 2184 } 2185 2186 // Re-schedule loads. 2187 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2188 unsigned Base = LdBases[i]; 2189 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2190 if (Lds.size() > 1) 2191 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2192 } 2193 2194 // Re-schedule stores. 2195 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2196 unsigned Base = StBases[i]; 2197 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2198 if (Sts.size() > 1) 2199 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2200 } 2201 2202 if (MBBI != E) { 2203 Base2LdsMap.clear(); 2204 Base2StsMap.clear(); 2205 LdBases.clear(); 2206 StBases.clear(); 2207 } 2208 } 2209 2210 return RetVal; 2211 } 2212 2213 2214 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 2215 /// optimization pass. 2216 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2217 if (PreAlloc) 2218 return new ARMPreAllocLoadStoreOpt(); 2219 return new ARMLoadStoreOpt(); 2220 } 2221