1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This file contains a pass that performs load / store related peephole 11 /// optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARM.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMBaseRegisterInfo.h" 18 #include "ARMISelLowering.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "ThumbRegisterInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallSet.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterClassInfo.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/LivePhysRegs.h" 37 #include "llvm/IR/DataLayout.h" 38 #include "llvm/IR/DerivedTypes.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/Support/Allocator.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include "llvm/Target/TargetInstrInfo.h" 45 #include "llvm/Target/TargetMachine.h" 46 #include "llvm/Target/TargetRegisterInfo.h" 47 using namespace llvm; 48 49 #define DEBUG_TYPE "arm-ldst-opt" 50 51 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 52 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 62 63 namespace llvm { 64 void initializeARMLoadStoreOptPass(PassRegistry &); 65 } 66 67 #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass" 68 69 namespace { 70 /// Post- register allocation pass the combine load / store instructions to 71 /// form ldm / stm instructions. 72 struct ARMLoadStoreOpt : public MachineFunctionPass { 73 static char ID; 74 ARMLoadStoreOpt() : MachineFunctionPass(ID) { 75 initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry()); 76 } 77 78 const MachineFunction *MF; 79 const TargetInstrInfo *TII; 80 const TargetRegisterInfo *TRI; 81 const ARMSubtarget *STI; 82 const TargetLowering *TL; 83 ARMFunctionInfo *AFI; 84 LivePhysRegs LiveRegs; 85 RegisterClassInfo RegClassInfo; 86 MachineBasicBlock::const_iterator LiveRegPos; 87 bool LiveRegsValid; 88 bool RegClassInfoValid; 89 bool isThumb1, isThumb2; 90 91 bool runOnMachineFunction(MachineFunction &Fn) override; 92 93 const char *getPassName() const override { 94 return ARM_LOAD_STORE_OPT_NAME; 95 } 96 97 private: 98 /// A set of load/store MachineInstrs with same base register sorted by 99 /// offset. 100 struct MemOpQueueEntry { 101 MachineInstr *MI; 102 int Offset; ///< Load/Store offset. 103 unsigned Position; ///< Position as counted from end of basic block. 104 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position) 105 : MI(MI), Offset(Offset), Position(Position) {} 106 }; 107 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 108 109 /// A set of MachineInstrs that fulfill (nearly all) conditions to get 110 /// merged into a LDM/STM. 111 struct MergeCandidate { 112 /// List of instructions ordered by load/store offset. 113 SmallVector<MachineInstr*, 4> Instrs; 114 /// Index in Instrs of the instruction being latest in the schedule. 115 unsigned LatestMIIdx; 116 /// Index in Instrs of the instruction being earliest in the schedule. 117 unsigned EarliestMIIdx; 118 /// Index into the basic block where the merged instruction will be 119 /// inserted. (See MemOpQueueEntry.Position) 120 unsigned InsertPos; 121 /// Whether the instructions can be merged into a ldm/stm instruction. 122 bool CanMergeToLSMulti; 123 /// Whether the instructions can be merged into a ldrd/strd instruction. 124 bool CanMergeToLSDouble; 125 }; 126 SpecificBumpPtrAllocator<MergeCandidate> Allocator; 127 SmallVector<const MergeCandidate*,4> Candidates; 128 SmallVector<MachineInstr*,4> MergeBaseCandidates; 129 130 void moveLiveRegsBefore(const MachineBasicBlock &MBB, 131 MachineBasicBlock::const_iterator Before); 132 unsigned findFreeReg(const TargetRegisterClass &RegClass); 133 void UpdateBaseRegUses(MachineBasicBlock &MBB, 134 MachineBasicBlock::iterator MBBI, 135 DebugLoc DL, unsigned Base, unsigned WordOffset, 136 ARMCC::CondCodes Pred, unsigned PredReg); 137 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB, 138 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 139 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 140 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs); 141 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB, 142 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 143 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 144 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const; 145 void FormCandidates(const MemOpQueue &MemOps); 146 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); 147 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 148 MachineBasicBlock::iterator &MBBI); 149 bool MergeBaseUpdateLoadStore(MachineInstr *MI); 150 bool MergeBaseUpdateLSMultiple(MachineInstr *MI); 151 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const; 152 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 153 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 154 bool CombineMovBx(MachineBasicBlock &MBB); 155 }; 156 char ARMLoadStoreOpt::ID = 0; 157 } 158 159 INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false) 160 161 static bool definesCPSR(const MachineInstr *MI) { 162 for (const auto &MO : MI->operands()) { 163 if (!MO.isReg()) 164 continue; 165 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 166 // If the instruction has live CPSR def, then it's not safe to fold it 167 // into load / store. 168 return true; 169 } 170 171 return false; 172 } 173 174 static int getMemoryOpOffset(const MachineInstr *MI) { 175 unsigned Opcode = MI->getOpcode(); 176 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 177 unsigned NumOperands = MI->getDesc().getNumOperands(); 178 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 179 180 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 181 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 182 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 183 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 184 return OffField; 185 186 // Thumb1 immediate offsets are scaled by 4 187 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 188 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 189 return OffField * 4; 190 191 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 192 : ARM_AM::getAM5Offset(OffField) * 4; 193 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 194 : ARM_AM::getAM5Op(OffField); 195 196 if (Op == ARM_AM::sub) 197 return -Offset; 198 199 return Offset; 200 } 201 202 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) { 203 return MI.getOperand(1); 204 } 205 206 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) { 207 return MI.getOperand(0); 208 } 209 210 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { 211 switch (Opcode) { 212 default: llvm_unreachable("Unhandled opcode!"); 213 case ARM::LDRi12: 214 ++NumLDMGened; 215 switch (Mode) { 216 default: llvm_unreachable("Unhandled submode!"); 217 case ARM_AM::ia: return ARM::LDMIA; 218 case ARM_AM::da: return ARM::LDMDA; 219 case ARM_AM::db: return ARM::LDMDB; 220 case ARM_AM::ib: return ARM::LDMIB; 221 } 222 case ARM::STRi12: 223 ++NumSTMGened; 224 switch (Mode) { 225 default: llvm_unreachable("Unhandled submode!"); 226 case ARM_AM::ia: return ARM::STMIA; 227 case ARM_AM::da: return ARM::STMDA; 228 case ARM_AM::db: return ARM::STMDB; 229 case ARM_AM::ib: return ARM::STMIB; 230 } 231 case ARM::tLDRi: 232 case ARM::tLDRspi: 233 // tLDMIA is writeback-only - unless the base register is in the input 234 // reglist. 235 ++NumLDMGened; 236 switch (Mode) { 237 default: llvm_unreachable("Unhandled submode!"); 238 case ARM_AM::ia: return ARM::tLDMIA; 239 } 240 case ARM::tSTRi: 241 case ARM::tSTRspi: 242 // There is no non-writeback tSTMIA either. 243 ++NumSTMGened; 244 switch (Mode) { 245 default: llvm_unreachable("Unhandled submode!"); 246 case ARM_AM::ia: return ARM::tSTMIA_UPD; 247 } 248 case ARM::t2LDRi8: 249 case ARM::t2LDRi12: 250 ++NumLDMGened; 251 switch (Mode) { 252 default: llvm_unreachable("Unhandled submode!"); 253 case ARM_AM::ia: return ARM::t2LDMIA; 254 case ARM_AM::db: return ARM::t2LDMDB; 255 } 256 case ARM::t2STRi8: 257 case ARM::t2STRi12: 258 ++NumSTMGened; 259 switch (Mode) { 260 default: llvm_unreachable("Unhandled submode!"); 261 case ARM_AM::ia: return ARM::t2STMIA; 262 case ARM_AM::db: return ARM::t2STMDB; 263 } 264 case ARM::VLDRS: 265 ++NumVLDMGened; 266 switch (Mode) { 267 default: llvm_unreachable("Unhandled submode!"); 268 case ARM_AM::ia: return ARM::VLDMSIA; 269 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 270 } 271 case ARM::VSTRS: 272 ++NumVSTMGened; 273 switch (Mode) { 274 default: llvm_unreachable("Unhandled submode!"); 275 case ARM_AM::ia: return ARM::VSTMSIA; 276 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 277 } 278 case ARM::VLDRD: 279 ++NumVLDMGened; 280 switch (Mode) { 281 default: llvm_unreachable("Unhandled submode!"); 282 case ARM_AM::ia: return ARM::VLDMDIA; 283 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 284 } 285 case ARM::VSTRD: 286 ++NumVSTMGened; 287 switch (Mode) { 288 default: llvm_unreachable("Unhandled submode!"); 289 case ARM_AM::ia: return ARM::VSTMDIA; 290 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 291 } 292 } 293 } 294 295 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { 296 switch (Opcode) { 297 default: llvm_unreachable("Unhandled opcode!"); 298 case ARM::LDMIA_RET: 299 case ARM::LDMIA: 300 case ARM::LDMIA_UPD: 301 case ARM::STMIA: 302 case ARM::STMIA_UPD: 303 case ARM::tLDMIA: 304 case ARM::tLDMIA_UPD: 305 case ARM::tSTMIA_UPD: 306 case ARM::t2LDMIA_RET: 307 case ARM::t2LDMIA: 308 case ARM::t2LDMIA_UPD: 309 case ARM::t2STMIA: 310 case ARM::t2STMIA_UPD: 311 case ARM::VLDMSIA: 312 case ARM::VLDMSIA_UPD: 313 case ARM::VSTMSIA: 314 case ARM::VSTMSIA_UPD: 315 case ARM::VLDMDIA: 316 case ARM::VLDMDIA_UPD: 317 case ARM::VSTMDIA: 318 case ARM::VSTMDIA_UPD: 319 return ARM_AM::ia; 320 321 case ARM::LDMDA: 322 case ARM::LDMDA_UPD: 323 case ARM::STMDA: 324 case ARM::STMDA_UPD: 325 return ARM_AM::da; 326 327 case ARM::LDMDB: 328 case ARM::LDMDB_UPD: 329 case ARM::STMDB: 330 case ARM::STMDB_UPD: 331 case ARM::t2LDMDB: 332 case ARM::t2LDMDB_UPD: 333 case ARM::t2STMDB: 334 case ARM::t2STMDB_UPD: 335 case ARM::VLDMSDB_UPD: 336 case ARM::VSTMSDB_UPD: 337 case ARM::VLDMDDB_UPD: 338 case ARM::VSTMDDB_UPD: 339 return ARM_AM::db; 340 341 case ARM::LDMIB: 342 case ARM::LDMIB_UPD: 343 case ARM::STMIB: 344 case ARM::STMIB_UPD: 345 return ARM_AM::ib; 346 } 347 } 348 349 static bool isT1i32Load(unsigned Opc) { 350 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 351 } 352 353 static bool isT2i32Load(unsigned Opc) { 354 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 355 } 356 357 static bool isi32Load(unsigned Opc) { 358 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 359 } 360 361 static bool isT1i32Store(unsigned Opc) { 362 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 363 } 364 365 static bool isT2i32Store(unsigned Opc) { 366 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 367 } 368 369 static bool isi32Store(unsigned Opc) { 370 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 371 } 372 373 static bool isLoadSingle(unsigned Opc) { 374 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 375 } 376 377 static unsigned getImmScale(unsigned Opc) { 378 switch (Opc) { 379 default: llvm_unreachable("Unhandled opcode!"); 380 case ARM::tLDRi: 381 case ARM::tSTRi: 382 case ARM::tLDRspi: 383 case ARM::tSTRspi: 384 return 1; 385 case ARM::tLDRHi: 386 case ARM::tSTRHi: 387 return 2; 388 case ARM::tLDRBi: 389 case ARM::tSTRBi: 390 return 4; 391 } 392 } 393 394 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) { 395 switch (MI->getOpcode()) { 396 default: return 0; 397 case ARM::LDRi12: 398 case ARM::STRi12: 399 case ARM::tLDRi: 400 case ARM::tSTRi: 401 case ARM::tLDRspi: 402 case ARM::tSTRspi: 403 case ARM::t2LDRi8: 404 case ARM::t2LDRi12: 405 case ARM::t2STRi8: 406 case ARM::t2STRi12: 407 case ARM::VLDRS: 408 case ARM::VSTRS: 409 return 4; 410 case ARM::VLDRD: 411 case ARM::VSTRD: 412 return 8; 413 case ARM::LDMIA: 414 case ARM::LDMDA: 415 case ARM::LDMDB: 416 case ARM::LDMIB: 417 case ARM::STMIA: 418 case ARM::STMDA: 419 case ARM::STMDB: 420 case ARM::STMIB: 421 case ARM::tLDMIA: 422 case ARM::tLDMIA_UPD: 423 case ARM::tSTMIA_UPD: 424 case ARM::t2LDMIA: 425 case ARM::t2LDMDB: 426 case ARM::t2STMIA: 427 case ARM::t2STMDB: 428 case ARM::VLDMSIA: 429 case ARM::VSTMSIA: 430 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 431 case ARM::VLDMDIA: 432 case ARM::VSTMDIA: 433 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 434 } 435 } 436 437 /// Update future uses of the base register with the offset introduced 438 /// due to writeback. This function only works on Thumb1. 439 void 440 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 441 MachineBasicBlock::iterator MBBI, 442 DebugLoc DL, unsigned Base, 443 unsigned WordOffset, 444 ARMCC::CondCodes Pred, unsigned PredReg) { 445 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 446 // Start updating any instructions with immediate offsets. Insert a SUB before 447 // the first non-updateable instruction (if any). 448 for (; MBBI != MBB.end(); ++MBBI) { 449 bool InsertSub = false; 450 unsigned Opc = MBBI->getOpcode(); 451 452 if (MBBI->readsRegister(Base)) { 453 int Offset; 454 bool IsLoad = 455 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 456 bool IsStore = 457 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 458 459 if (IsLoad || IsStore) { 460 // Loads and stores with immediate offsets can be updated, but only if 461 // the new offset isn't negative. 462 // The MachineOperand containing the offset immediate is the last one 463 // before predicates. 464 MachineOperand &MO = 465 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 466 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 467 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 468 469 // If storing the base register, it needs to be reset first. 470 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); 471 472 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 473 MO.setImm(Offset); 474 else 475 InsertSub = true; 476 477 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 478 !definesCPSR(MBBI)) { 479 // SUBS/ADDS using this register, with a dead def of the CPSR. 480 // Merge it with the update; if the merged offset is too large, 481 // insert a new sub instead. 482 MachineOperand &MO = 483 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 484 Offset = (Opc == ARM::tSUBi8) ? 485 MO.getImm() + WordOffset * 4 : 486 MO.getImm() - WordOffset * 4 ; 487 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 488 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 489 // Offset == 0. 490 MO.setImm(Offset); 491 // The base register has now been reset, so exit early. 492 return; 493 } else { 494 InsertSub = true; 495 } 496 497 } else { 498 // Can't update the instruction. 499 InsertSub = true; 500 } 501 502 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 503 // Since SUBS sets the condition flags, we can't place the base reset 504 // after an instruction that has a live CPSR def. 505 // The base register might also contain an argument for a function call. 506 InsertSub = true; 507 } 508 509 if (InsertSub) { 510 // An instruction above couldn't be updated, so insert a sub. 511 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 512 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 513 return; 514 } 515 516 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base)) 517 // Register got killed. Stop updating. 518 return; 519 } 520 521 // End of block was reached. 522 if (MBB.succ_size() > 0) { 523 // FIXME: Because of a bug, live registers are sometimes missing from 524 // the successor blocks' live-in sets. This means we can't trust that 525 // information and *always* have to reset at the end of a block. 526 // See PR21029. 527 if (MBBI != MBB.end()) --MBBI; 528 AddDefaultT1CC( 529 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) 530 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); 531 } 532 } 533 534 /// Return the first register of class \p RegClass that is not in \p Regs. 535 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) { 536 if (!RegClassInfoValid) { 537 RegClassInfo.runOnMachineFunction(*MF); 538 RegClassInfoValid = true; 539 } 540 541 for (unsigned Reg : RegClassInfo.getOrder(&RegClass)) 542 if (!LiveRegs.contains(Reg)) 543 return Reg; 544 return 0; 545 } 546 547 /// Compute live registers just before instruction \p Before (in normal schedule 548 /// direction). Computes backwards so multiple queries in the same block must 549 /// come in reverse order. 550 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, 551 MachineBasicBlock::const_iterator Before) { 552 // Initialize if we never queried in this block. 553 if (!LiveRegsValid) { 554 LiveRegs.init(TRI); 555 LiveRegs.addLiveOuts(&MBB, true); 556 LiveRegPos = MBB.end(); 557 LiveRegsValid = true; 558 } 559 // Move backward just before the "Before" position. 560 while (LiveRegPos != Before) { 561 --LiveRegPos; 562 LiveRegs.stepBackward(*LiveRegPos); 563 } 564 } 565 566 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs, 567 unsigned Reg) { 568 for (const std::pair<unsigned, bool> &R : Regs) 569 if (R.first == Reg) 570 return true; 571 return false; 572 } 573 574 /// Create and insert a LDM or STM with Base as base register and registers in 575 /// Regs as the register operands that would be loaded / stored. It returns 576 /// true if the transformation is done. 577 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, 578 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 579 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 580 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) { 581 unsigned NumRegs = Regs.size(); 582 assert(NumRegs > 1); 583 584 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 585 // Compute liveness information for that register to make the decision. 586 bool SafeToClobberCPSR = !isThumb1 || 587 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) == 588 MachineBasicBlock::LQR_Dead); 589 590 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 591 592 // Exception: If the base register is in the input reglist, Thumb1 LDM is 593 // non-writeback. 594 // It's also not possible to merge an STR of the base register in Thumb1. 595 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) { 596 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 597 if (Opcode == ARM::tLDRi) { 598 Writeback = false; 599 } else if (Opcode == ARM::tSTRi) { 600 return nullptr; 601 } 602 } 603 604 ARM_AM::AMSubMode Mode = ARM_AM::ia; 605 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 606 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 607 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 608 609 if (Offset == 4 && haveIBAndDA) { 610 Mode = ARM_AM::ib; 611 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 612 Mode = ARM_AM::da; 613 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 614 // VLDM/VSTM do not support DB mode without also updating the base reg. 615 Mode = ARM_AM::db; 616 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 617 // Check if this is a supported opcode before inserting instructions to 618 // calculate a new base register. 619 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr; 620 621 // If starting offset isn't zero, insert a MI to materialize a new base. 622 // But only do so if it is cost effective, i.e. merging more than two 623 // loads / stores. 624 if (NumRegs <= 2) 625 return nullptr; 626 627 // On Thumb1, it's not worth materializing a new base register without 628 // clobbering the CPSR (i.e. not using ADDS/SUBS). 629 if (!SafeToClobberCPSR) 630 return nullptr; 631 632 unsigned NewBase; 633 if (isi32Load(Opcode)) { 634 // If it is a load, then just use one of the destination registers 635 // as the new base. Will no longer be writeback in Thumb1. 636 NewBase = Regs[NumRegs-1].first; 637 Writeback = false; 638 } else { 639 // Find a free register that we can use as scratch register. 640 moveLiveRegsBefore(MBB, InsertBefore); 641 // The merged instruction does not exist yet but will use several Regs if 642 // it is a Store. 643 if (!isLoadSingle(Opcode)) 644 for (const std::pair<unsigned, bool> &R : Regs) 645 LiveRegs.addReg(R.first); 646 647 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass); 648 if (NewBase == 0) 649 return nullptr; 650 } 651 652 int BaseOpc = 653 isThumb2 ? ARM::t2ADDri : 654 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 655 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 656 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 657 658 if (Offset < 0) { 659 Offset = - Offset; 660 BaseOpc = 661 isThumb2 ? ARM::t2SUBri : 662 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 663 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 664 } 665 666 if (!TL->isLegalAddImmediate(Offset)) 667 // FIXME: Try add with register operand? 668 return nullptr; // Probably not worth it then. 669 670 // We can only append a kill flag to the add/sub input if the value is not 671 // used in the register list of the stm as well. 672 bool KillOldBase = BaseKill && 673 (!isi32Store(Opcode) || !ContainsReg(Regs, Base)); 674 675 if (isThumb1) { 676 // Thumb1: depending on immediate size, use either 677 // ADDS NewBase, Base, #imm3 678 // or 679 // MOV NewBase, Base 680 // ADDS NewBase, #imm8. 681 if (Base != NewBase && 682 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 683 // Need to insert a MOV to the new base first. 684 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 685 !STI->hasV6Ops()) { 686 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 687 if (Pred != ARMCC::AL) 688 return nullptr; 689 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase) 690 .addReg(Base, getKillRegState(KillOldBase)); 691 } else 692 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase) 693 .addReg(Base, getKillRegState(KillOldBase)) 694 .addImm(Pred).addReg(PredReg); 695 696 // The following ADDS/SUBS becomes an update. 697 Base = NewBase; 698 KillOldBase = true; 699 } 700 if (BaseOpc == ARM::tADDrSPi) { 701 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 702 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 703 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4) 704 .addImm(Pred).addReg(PredReg); 705 } else 706 AddDefaultT1CC( 707 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true) 708 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 709 .addImm(Pred).addReg(PredReg); 710 } else { 711 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) 712 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) 713 .addImm(Pred).addReg(PredReg).addReg(0); 714 } 715 Base = NewBase; 716 BaseKill = true; // New base is always killed straight away. 717 } 718 719 bool isDef = isLoadSingle(Opcode); 720 721 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 722 // base register writeback. 723 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 724 if (!Opcode) 725 return nullptr; 726 727 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 728 // - There is no writeback (LDM of base register), 729 // - the base register is killed by the merged instruction, 730 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 731 // to reset the base register. 732 // Otherwise, don't merge. 733 // It's safe to return here since the code to materialize a new base register 734 // above is also conditional on SafeToClobberCPSR. 735 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 736 return nullptr; 737 738 MachineInstrBuilder MIB; 739 740 if (Writeback) { 741 assert(isThumb1 && "expected Writeback only inThumb1"); 742 if (Opcode == ARM::tLDMIA) { 743 assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs"); 744 // Update tLDMIA with writeback if necessary. 745 Opcode = ARM::tLDMIA_UPD; 746 } 747 748 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 749 750 // Thumb1: we might need to set base writeback when building the MI. 751 MIB.addReg(Base, getDefRegState(true)) 752 .addReg(Base, getKillRegState(BaseKill)); 753 754 // The base isn't dead after a merged instruction with writeback. 755 // Insert a sub instruction after the newly formed instruction to reset. 756 if (!BaseKill) 757 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg); 758 759 } else { 760 // No writeback, simply build the MachineInstr. 761 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); 762 MIB.addReg(Base, getKillRegState(BaseKill)); 763 } 764 765 MIB.addImm(Pred).addReg(PredReg); 766 767 for (const std::pair<unsigned, bool> &R : Regs) 768 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second)); 769 770 return MIB.getInstr(); 771 } 772 773 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB, 774 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, 775 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, 776 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const { 777 bool IsLoad = isi32Load(Opcode); 778 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store"); 779 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8; 780 781 assert(Regs.size() == 2); 782 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL, 783 TII->get(LoadStoreOpcode)); 784 if (IsLoad) { 785 MIB.addReg(Regs[0].first, RegState::Define) 786 .addReg(Regs[1].first, RegState::Define); 787 } else { 788 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second)) 789 .addReg(Regs[1].first, getKillRegState(Regs[1].second)); 790 } 791 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 792 return MIB.getInstr(); 793 } 794 795 /// Call MergeOps and update MemOps and merges accordingly on success. 796 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { 797 const MachineInstr *First = Cand.Instrs.front(); 798 unsigned Opcode = First->getOpcode(); 799 bool IsLoad = isLoadSingle(Opcode); 800 SmallVector<std::pair<unsigned, bool>, 8> Regs; 801 SmallVector<unsigned, 4> ImpDefs; 802 DenseSet<unsigned> KilledRegs; 803 DenseSet<unsigned> UsedRegs; 804 // Determine list of registers and list of implicit super-register defs. 805 for (const MachineInstr *MI : Cand.Instrs) { 806 const MachineOperand &MO = getLoadStoreRegOp(*MI); 807 unsigned Reg = MO.getReg(); 808 bool IsKill = MO.isKill(); 809 if (IsKill) 810 KilledRegs.insert(Reg); 811 Regs.push_back(std::make_pair(Reg, IsKill)); 812 UsedRegs.insert(Reg); 813 814 if (IsLoad) { 815 // Collect any implicit defs of super-registers, after merging we can't 816 // be sure anymore that we properly preserved these live ranges and must 817 // removed these implicit operands. 818 for (const MachineOperand &MO : MI->implicit_operands()) { 819 if (!MO.isReg() || !MO.isDef() || MO.isDead()) 820 continue; 821 assert(MO.isImplicit()); 822 unsigned DefReg = MO.getReg(); 823 824 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end()) 825 continue; 826 // We can ignore cases where the super-reg is read and written. 827 if (MI->readsRegister(DefReg)) 828 continue; 829 ImpDefs.push_back(DefReg); 830 } 831 } 832 } 833 834 // Attempt the merge. 835 typedef MachineBasicBlock::iterator iterator; 836 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; 837 iterator InsertBefore = std::next(iterator(LatestMI)); 838 MachineBasicBlock &MBB = *LatestMI->getParent(); 839 unsigned Offset = getMemoryOpOffset(First); 840 unsigned Base = getLoadStoreBaseOp(*First).getReg(); 841 bool BaseKill = LatestMI->killsRegister(Base); 842 unsigned PredReg = 0; 843 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg); 844 DebugLoc DL = First->getDebugLoc(); 845 MachineInstr *Merged = nullptr; 846 if (Cand.CanMergeToLSDouble) 847 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill, 848 Opcode, Pred, PredReg, DL, Regs); 849 if (!Merged && Cand.CanMergeToLSMulti) 850 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill, 851 Opcode, Pred, PredReg, DL, Regs); 852 if (!Merged) 853 return nullptr; 854 855 // Determine earliest instruction that will get removed. We then keep an 856 // iterator just above it so the following erases don't invalidated it. 857 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]); 858 bool EarliestAtBegin = false; 859 if (EarliestI == MBB.begin()) { 860 EarliestAtBegin = true; 861 } else { 862 EarliestI = std::prev(EarliestI); 863 } 864 865 // Remove instructions which have been merged. 866 for (MachineInstr *MI : Cand.Instrs) 867 MBB.erase(MI); 868 869 // Determine range between the earliest removed instruction and the new one. 870 if (EarliestAtBegin) 871 EarliestI = MBB.begin(); 872 else 873 EarliestI = std::next(EarliestI); 874 auto FixupRange = make_range(EarliestI, iterator(Merged)); 875 876 if (isLoadSingle(Opcode)) { 877 // If the previous loads defined a super-reg, then we have to mark earlier 878 // operands undef; Replicate the super-reg def on the merged instruction. 879 for (MachineInstr &MI : FixupRange) { 880 for (unsigned &ImpDefReg : ImpDefs) { 881 for (MachineOperand &MO : MI.implicit_operands()) { 882 if (!MO.isReg() || MO.getReg() != ImpDefReg) 883 continue; 884 if (MO.readsReg()) 885 MO.setIsUndef(); 886 else if (MO.isDef()) 887 ImpDefReg = 0; 888 } 889 } 890 } 891 892 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged); 893 for (unsigned ImpDef : ImpDefs) 894 MIB.addReg(ImpDef, RegState::ImplicitDefine); 895 } else { 896 // Remove kill flags: We are possibly storing the values later now. 897 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD); 898 for (MachineInstr &MI : FixupRange) { 899 for (MachineOperand &MO : MI.uses()) { 900 if (!MO.isReg() || !MO.isKill()) 901 continue; 902 if (UsedRegs.count(MO.getReg())) 903 MO.setIsKill(false); 904 } 905 } 906 assert(ImpDefs.empty()); 907 } 908 909 return Merged; 910 } 911 912 static bool isValidLSDoubleOffset(int Offset) { 913 unsigned Value = abs(Offset); 914 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally 915 // multiplied by 4. 916 return (Value % 4) == 0 && Value < 1024; 917 } 918 919 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries. 920 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { 921 const MachineInstr *FirstMI = MemOps[0].MI; 922 unsigned Opcode = FirstMI->getOpcode(); 923 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 924 unsigned Size = getLSMultipleTransferSize(FirstMI); 925 926 unsigned SIndex = 0; 927 unsigned EIndex = MemOps.size(); 928 do { 929 // Look at the first instruction. 930 const MachineInstr *MI = MemOps[SIndex].MI; 931 int Offset = MemOps[SIndex].Offset; 932 const MachineOperand &PMO = getLoadStoreRegOp(*MI); 933 unsigned PReg = PMO.getReg(); 934 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 935 unsigned Latest = SIndex; 936 unsigned Earliest = SIndex; 937 unsigned Count = 1; 938 bool CanMergeToLSDouble = 939 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset); 940 // ARM errata 602117: LDRD with base in list may result in incorrect base 941 // register when interrupted or faulted. 942 if (STI->isCortexM3() && isi32Load(Opcode) && 943 PReg == getLoadStoreBaseOp(*MI).getReg()) 944 CanMergeToLSDouble = false; 945 946 bool CanMergeToLSMulti = true; 947 // On swift vldm/vstm starting with an odd register number as that needs 948 // more uops than single vldrs. 949 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1) 950 CanMergeToLSMulti = false; 951 952 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it 953 // deprecated; LDM to PC is fine but cannot happen here. 954 if (PReg == ARM::SP || PReg == ARM::PC) 955 CanMergeToLSMulti = CanMergeToLSDouble = false; 956 957 // Merge following instructions where possible. 958 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { 959 int NewOffset = MemOps[I].Offset; 960 if (NewOffset != Offset + (int)Size) 961 break; 962 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); 963 unsigned Reg = MO.getReg(); 964 if (Reg == ARM::SP || Reg == ARM::PC) 965 break; 966 967 // See if the current load/store may be part of a multi load/store. 968 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 969 bool PartOfLSMulti = CanMergeToLSMulti; 970 if (PartOfLSMulti) { 971 // Register numbers must be in ascending order. 972 if (RegNum <= PRegNum) 973 PartOfLSMulti = false; 974 // For VFP / NEON load/store multiples, the registers must be 975 // consecutive and within the limit on the number of registers per 976 // instruction. 977 else if (!isNotVFP && RegNum != PRegNum+1) 978 PartOfLSMulti = false; 979 } 980 // See if the current load/store may be part of a double load/store. 981 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1; 982 983 if (!PartOfLSMulti && !PartOfLSDouble) 984 break; 985 CanMergeToLSMulti &= PartOfLSMulti; 986 CanMergeToLSDouble &= PartOfLSDouble; 987 // Track MemOp with latest and earliest position (Positions are 988 // counted in reverse). 989 unsigned Position = MemOps[I].Position; 990 if (Position < MemOps[Latest].Position) 991 Latest = I; 992 else if (Position > MemOps[Earliest].Position) 993 Earliest = I; 994 // Prepare for next MemOp. 995 Offset += Size; 996 PRegNum = RegNum; 997 } 998 999 // Form a candidate from the Ops collected so far. 1000 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate; 1001 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C) 1002 Candidate->Instrs.push_back(MemOps[C].MI); 1003 Candidate->LatestMIIdx = Latest - SIndex; 1004 Candidate->EarliestMIIdx = Earliest - SIndex; 1005 Candidate->InsertPos = MemOps[Latest].Position; 1006 if (Count == 1) 1007 CanMergeToLSMulti = CanMergeToLSDouble = false; 1008 Candidate->CanMergeToLSMulti = CanMergeToLSMulti; 1009 Candidate->CanMergeToLSDouble = CanMergeToLSDouble; 1010 Candidates.push_back(Candidate); 1011 // Continue after the chain. 1012 SIndex += Count; 1013 } while (SIndex < EIndex); 1014 } 1015 1016 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1017 ARM_AM::AMSubMode Mode) { 1018 switch (Opc) { 1019 default: llvm_unreachable("Unhandled opcode!"); 1020 case ARM::LDMIA: 1021 case ARM::LDMDA: 1022 case ARM::LDMDB: 1023 case ARM::LDMIB: 1024 switch (Mode) { 1025 default: llvm_unreachable("Unhandled submode!"); 1026 case ARM_AM::ia: return ARM::LDMIA_UPD; 1027 case ARM_AM::ib: return ARM::LDMIB_UPD; 1028 case ARM_AM::da: return ARM::LDMDA_UPD; 1029 case ARM_AM::db: return ARM::LDMDB_UPD; 1030 } 1031 case ARM::STMIA: 1032 case ARM::STMDA: 1033 case ARM::STMDB: 1034 case ARM::STMIB: 1035 switch (Mode) { 1036 default: llvm_unreachable("Unhandled submode!"); 1037 case ARM_AM::ia: return ARM::STMIA_UPD; 1038 case ARM_AM::ib: return ARM::STMIB_UPD; 1039 case ARM_AM::da: return ARM::STMDA_UPD; 1040 case ARM_AM::db: return ARM::STMDB_UPD; 1041 } 1042 case ARM::t2LDMIA: 1043 case ARM::t2LDMDB: 1044 switch (Mode) { 1045 default: llvm_unreachable("Unhandled submode!"); 1046 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1047 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1048 } 1049 case ARM::t2STMIA: 1050 case ARM::t2STMDB: 1051 switch (Mode) { 1052 default: llvm_unreachable("Unhandled submode!"); 1053 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1054 case ARM_AM::db: return ARM::t2STMDB_UPD; 1055 } 1056 case ARM::VLDMSIA: 1057 switch (Mode) { 1058 default: llvm_unreachable("Unhandled submode!"); 1059 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1060 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1061 } 1062 case ARM::VLDMDIA: 1063 switch (Mode) { 1064 default: llvm_unreachable("Unhandled submode!"); 1065 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1066 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1067 } 1068 case ARM::VSTMSIA: 1069 switch (Mode) { 1070 default: llvm_unreachable("Unhandled submode!"); 1071 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1072 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1073 } 1074 case ARM::VSTMDIA: 1075 switch (Mode) { 1076 default: llvm_unreachable("Unhandled submode!"); 1077 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1078 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1079 } 1080 } 1081 } 1082 1083 /// Check if the given instruction increments or decrements a register and 1084 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags 1085 /// generated by the instruction are possibly read as well. 1086 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, 1087 ARMCC::CondCodes Pred, unsigned PredReg) { 1088 bool CheckCPSRDef; 1089 int Scale; 1090 switch (MI.getOpcode()) { 1091 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break; 1092 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break; 1093 case ARM::t2SUBri: 1094 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break; 1095 case ARM::t2ADDri: 1096 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break; 1097 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break; 1098 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break; 1099 default: return 0; 1100 } 1101 1102 unsigned MIPredReg; 1103 if (MI.getOperand(0).getReg() != Reg || 1104 MI.getOperand(1).getReg() != Reg || 1105 getInstrPredicate(&MI, MIPredReg) != Pred || 1106 MIPredReg != PredReg) 1107 return 0; 1108 1109 if (CheckCPSRDef && definesCPSR(&MI)) 1110 return 0; 1111 return MI.getOperand(2).getImm() * Scale; 1112 } 1113 1114 /// Searches for an increment or decrement of \p Reg before \p MBBI. 1115 static MachineBasicBlock::iterator 1116 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg, 1117 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1118 Offset = 0; 1119 MachineBasicBlock &MBB = *MBBI->getParent(); 1120 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1121 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1122 if (MBBI == BeginMBBI) 1123 return EndMBBI; 1124 1125 // Skip debug values. 1126 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1127 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI) 1128 --PrevMBBI; 1129 1130 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg); 1131 return Offset == 0 ? EndMBBI : PrevMBBI; 1132 } 1133 1134 /// Searches for a increment or decrement of \p Reg after \p MBBI. 1135 static MachineBasicBlock::iterator 1136 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg, 1137 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { 1138 Offset = 0; 1139 MachineBasicBlock &MBB = *MBBI->getParent(); 1140 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1141 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1142 // Skip debug values. 1143 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1144 ++NextMBBI; 1145 if (NextMBBI == EndMBBI) 1146 return EndMBBI; 1147 1148 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg); 1149 return Offset == 0 ? EndMBBI : NextMBBI; 1150 } 1151 1152 /// Fold proceeding/trailing inc/dec of base register into the 1153 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1154 /// 1155 /// stmia rn, <ra, rb, rc> 1156 /// rn := rn + 4 * 3; 1157 /// => 1158 /// stmia rn!, <ra, rb, rc> 1159 /// 1160 /// rn := rn - 4 * 3; 1161 /// ldmia rn, <ra, rb, rc> 1162 /// => 1163 /// ldmdb rn!, <ra, rb, rc> 1164 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { 1165 // Thumb1 is already using updating loads/stores. 1166 if (isThumb1) return false; 1167 1168 const MachineOperand &BaseOP = MI->getOperand(0); 1169 unsigned Base = BaseOP.getReg(); 1170 bool BaseKill = BaseOP.isKill(); 1171 unsigned PredReg = 0; 1172 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1173 unsigned Opcode = MI->getOpcode(); 1174 DebugLoc DL = MI->getDebugLoc(); 1175 1176 // Can't use an updating ld/st if the base register is also a dest 1177 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1178 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1179 if (MI->getOperand(i).getReg() == Base) 1180 return false; 1181 1182 int Bytes = getLSMultipleTransferSize(MI); 1183 MachineBasicBlock &MBB = *MI->getParent(); 1184 MachineBasicBlock::iterator MBBI(MI); 1185 int Offset; 1186 MachineBasicBlock::iterator MergeInstr 1187 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1188 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); 1189 if (Mode == ARM_AM::ia && Offset == -Bytes) { 1190 Mode = ARM_AM::db; 1191 } else if (Mode == ARM_AM::ib && Offset == -Bytes) { 1192 Mode = ARM_AM::da; 1193 } else { 1194 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1195 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) && 1196 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) 1197 return false; 1198 } 1199 MBB.erase(MergeInstr); 1200 1201 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1202 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1203 .addReg(Base, getDefRegState(true)) // WB base register 1204 .addReg(Base, getKillRegState(BaseKill)) 1205 .addImm(Pred).addReg(PredReg); 1206 1207 // Transfer the rest of operands. 1208 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1209 MIB.addOperand(MI->getOperand(OpNum)); 1210 1211 // Transfer memoperands. 1212 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1213 1214 MBB.erase(MBBI); 1215 return true; 1216 } 1217 1218 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1219 ARM_AM::AddrOpc Mode) { 1220 switch (Opc) { 1221 case ARM::LDRi12: 1222 return ARM::LDR_PRE_IMM; 1223 case ARM::STRi12: 1224 return ARM::STR_PRE_IMM; 1225 case ARM::VLDRS: 1226 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1227 case ARM::VLDRD: 1228 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1229 case ARM::VSTRS: 1230 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1231 case ARM::VSTRD: 1232 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1233 case ARM::t2LDRi8: 1234 case ARM::t2LDRi12: 1235 return ARM::t2LDR_PRE; 1236 case ARM::t2STRi8: 1237 case ARM::t2STRi12: 1238 return ARM::t2STR_PRE; 1239 default: llvm_unreachable("Unhandled opcode!"); 1240 } 1241 } 1242 1243 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1244 ARM_AM::AddrOpc Mode) { 1245 switch (Opc) { 1246 case ARM::LDRi12: 1247 return ARM::LDR_POST_IMM; 1248 case ARM::STRi12: 1249 return ARM::STR_POST_IMM; 1250 case ARM::VLDRS: 1251 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1252 case ARM::VLDRD: 1253 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1254 case ARM::VSTRS: 1255 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1256 case ARM::VSTRD: 1257 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1258 case ARM::t2LDRi8: 1259 case ARM::t2LDRi12: 1260 return ARM::t2LDR_POST; 1261 case ARM::t2STRi8: 1262 case ARM::t2STRi12: 1263 return ARM::t2STR_POST; 1264 default: llvm_unreachable("Unhandled opcode!"); 1265 } 1266 } 1267 1268 /// Fold proceeding/trailing inc/dec of base register into the 1269 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1270 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { 1271 // Thumb1 doesn't have updating LDR/STR. 1272 // FIXME: Use LDM/STM with single register instead. 1273 if (isThumb1) return false; 1274 1275 unsigned Base = getLoadStoreBaseOp(*MI).getReg(); 1276 bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); 1277 unsigned Opcode = MI->getOpcode(); 1278 DebugLoc DL = MI->getDebugLoc(); 1279 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1280 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1281 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1282 if (isi32Load(Opcode) || isi32Store(Opcode)) 1283 if (MI->getOperand(2).getImm() != 0) 1284 return false; 1285 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1286 return false; 1287 1288 // Can't do the merge if the destination register is the same as the would-be 1289 // writeback register. 1290 if (MI->getOperand(0).getReg() == Base) 1291 return false; 1292 1293 unsigned PredReg = 0; 1294 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1295 int Bytes = getLSMultipleTransferSize(MI); 1296 MachineBasicBlock &MBB = *MI->getParent(); 1297 MachineBasicBlock::iterator MBBI(MI); 1298 int Offset; 1299 MachineBasicBlock::iterator MergeInstr 1300 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); 1301 unsigned NewOpc; 1302 if (!isAM5 && Offset == Bytes) { 1303 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1304 } else if (Offset == -Bytes) { 1305 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1306 } else { 1307 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1308 if (Offset == Bytes) { 1309 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); 1310 } else if (!isAM5 && Offset == -Bytes) { 1311 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); 1312 } else 1313 return false; 1314 } 1315 MBB.erase(MergeInstr); 1316 1317 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add; 1318 1319 bool isLd = isLoadSingle(Opcode); 1320 if (isAM5) { 1321 // VLDM[SD]_UPD, VSTM[SD]_UPD 1322 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1323 // updating load/store-multiple instructions can be used with only one 1324 // register.) 1325 MachineOperand &MO = MI->getOperand(0); 1326 BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 1327 .addReg(Base, getDefRegState(true)) // WB base register 1328 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1329 .addImm(Pred).addReg(PredReg) 1330 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1331 getKillRegState(MO.isKill()))); 1332 } else if (isLd) { 1333 if (isAM2) { 1334 // LDR_PRE, LDR_POST 1335 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1336 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1337 .addReg(Base, RegState::Define) 1338 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1339 } else { 1340 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1341 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1342 .addReg(Base, RegState::Define) 1343 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1344 } 1345 } else { 1346 // t2LDR_PRE, t2LDR_POST 1347 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 1348 .addReg(Base, RegState::Define) 1349 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1350 } 1351 } else { 1352 MachineOperand &MO = MI->getOperand(0); 1353 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1354 // the vestigal zero-reg offset register. When that's fixed, this clause 1355 // can be removed entirely. 1356 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1357 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1358 // STR_PRE, STR_POST 1359 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1360 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1361 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); 1362 } else { 1363 // t2STR_PRE, t2STR_POST 1364 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 1365 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1366 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1367 } 1368 } 1369 MBB.erase(MBBI); 1370 1371 return true; 1372 } 1373 1374 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { 1375 unsigned Opcode = MI.getOpcode(); 1376 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) && 1377 "Must have t2STRDi8 or t2LDRDi8"); 1378 if (MI.getOperand(3).getImm() != 0) 1379 return false; 1380 1381 // Behaviour for writeback is undefined if base register is the same as one 1382 // of the others. 1383 const MachineOperand &BaseOp = MI.getOperand(2); 1384 unsigned Base = BaseOp.getReg(); 1385 const MachineOperand &Reg0Op = MI.getOperand(0); 1386 const MachineOperand &Reg1Op = MI.getOperand(1); 1387 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base) 1388 return false; 1389 1390 unsigned PredReg; 1391 ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg); 1392 MachineBasicBlock::iterator MBBI(MI); 1393 MachineBasicBlock &MBB = *MI.getParent(); 1394 int Offset; 1395 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred, 1396 PredReg, Offset); 1397 unsigned NewOpc; 1398 if (Offset == 8 || Offset == -8) { 1399 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE; 1400 } else { 1401 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); 1402 if (Offset == 8 || Offset == -8) { 1403 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; 1404 } else 1405 return false; 1406 } 1407 MBB.erase(MergeInstr); 1408 1409 DebugLoc DL = MI.getDebugLoc(); 1410 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); 1411 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) { 1412 MIB.addOperand(Reg0Op).addOperand(Reg1Op) 1413 .addReg(BaseOp.getReg(), RegState::Define); 1414 } else { 1415 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST); 1416 MIB.addReg(BaseOp.getReg(), RegState::Define) 1417 .addOperand(Reg0Op).addOperand(Reg1Op); 1418 } 1419 MIB.addReg(BaseOp.getReg(), RegState::Kill) 1420 .addImm(Offset).addImm(Pred).addReg(PredReg); 1421 assert(TII->get(Opcode).getNumOperands() == 6 && 1422 TII->get(NewOpc).getNumOperands() == 7 && 1423 "Unexpected number of operands in Opcode specification."); 1424 1425 // Transfer implicit operands. 1426 for (const MachineOperand &MO : MI.implicit_operands()) 1427 MIB.addOperand(MO); 1428 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); 1429 1430 MBB.erase(MBBI); 1431 return true; 1432 } 1433 1434 /// Returns true if instruction is a memory operation that this pass is capable 1435 /// of operating on. 1436 static bool isMemoryOp(const MachineInstr &MI) { 1437 unsigned Opcode = MI.getOpcode(); 1438 switch (Opcode) { 1439 case ARM::VLDRS: 1440 case ARM::VSTRS: 1441 case ARM::VLDRD: 1442 case ARM::VSTRD: 1443 case ARM::LDRi12: 1444 case ARM::STRi12: 1445 case ARM::tLDRi: 1446 case ARM::tSTRi: 1447 case ARM::tLDRspi: 1448 case ARM::tSTRspi: 1449 case ARM::t2LDRi8: 1450 case ARM::t2LDRi12: 1451 case ARM::t2STRi8: 1452 case ARM::t2STRi12: 1453 break; 1454 default: 1455 return false; 1456 } 1457 if (!MI.getOperand(1).isReg()) 1458 return false; 1459 1460 // When no memory operands are present, conservatively assume unaligned, 1461 // volatile, unfoldable. 1462 if (!MI.hasOneMemOperand()) 1463 return false; 1464 1465 const MachineMemOperand &MMO = **MI.memoperands_begin(); 1466 1467 // Don't touch volatile memory accesses - we may be changing their order. 1468 if (MMO.isVolatile()) 1469 return false; 1470 1471 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1472 // not. 1473 if (MMO.getAlignment() < 4) 1474 return false; 1475 1476 // str <undef> could probably be eliminated entirely, but for now we just want 1477 // to avoid making a mess of it. 1478 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1479 if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef()) 1480 return false; 1481 1482 // Likewise don't mess with references to undefined addresses. 1483 if (MI.getOperand(1).isUndef()) 1484 return false; 1485 1486 return true; 1487 } 1488 1489 static void InsertLDR_STR(MachineBasicBlock &MBB, 1490 MachineBasicBlock::iterator &MBBI, 1491 int Offset, bool isDef, 1492 DebugLoc DL, unsigned NewOpc, 1493 unsigned Reg, bool RegDeadKill, bool RegUndef, 1494 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1495 bool OffKill, bool OffUndef, 1496 ARMCC::CondCodes Pred, unsigned PredReg, 1497 const TargetInstrInfo *TII, bool isT2) { 1498 if (isDef) { 1499 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1500 TII->get(NewOpc)) 1501 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1502 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1503 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1504 } else { 1505 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1506 TII->get(NewOpc)) 1507 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1508 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1509 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1510 } 1511 } 1512 1513 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1514 MachineBasicBlock::iterator &MBBI) { 1515 MachineInstr *MI = &*MBBI; 1516 unsigned Opcode = MI->getOpcode(); 1517 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8) 1518 return false; 1519 1520 const MachineOperand &BaseOp = MI->getOperand(2); 1521 unsigned BaseReg = BaseOp.getReg(); 1522 unsigned EvenReg = MI->getOperand(0).getReg(); 1523 unsigned OddReg = MI->getOperand(1).getReg(); 1524 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1525 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1526 1527 // ARM errata 602117: LDRD with base in list may result in incorrect base 1528 // register when interrupted or faulted. 1529 bool Errata602117 = EvenReg == BaseReg && 1530 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3(); 1531 // ARM LDRD/STRD needs consecutive registers. 1532 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) && 1533 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum); 1534 1535 if (!Errata602117 && !NonConsecutiveRegs) 1536 return false; 1537 1538 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1539 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1540 bool EvenDeadKill = isLd ? 1541 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1542 bool EvenUndef = MI->getOperand(0).isUndef(); 1543 bool OddDeadKill = isLd ? 1544 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1545 bool OddUndef = MI->getOperand(1).isUndef(); 1546 bool BaseKill = BaseOp.isKill(); 1547 bool BaseUndef = BaseOp.isUndef(); 1548 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1549 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1550 int OffImm = getMemoryOpOffset(MI); 1551 unsigned PredReg = 0; 1552 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1553 1554 if (OddRegNum > EvenRegNum && OffImm == 0) { 1555 // Ascending register numbers and no offset. It's safe to change it to a 1556 // ldm or stm. 1557 unsigned NewOpc = (isLd) 1558 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1559 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1560 if (isLd) { 1561 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1562 .addReg(BaseReg, getKillRegState(BaseKill)) 1563 .addImm(Pred).addReg(PredReg) 1564 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1565 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1566 ++NumLDRD2LDM; 1567 } else { 1568 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1569 .addReg(BaseReg, getKillRegState(BaseKill)) 1570 .addImm(Pred).addReg(PredReg) 1571 .addReg(EvenReg, 1572 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1573 .addReg(OddReg, 1574 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1575 ++NumSTRD2STM; 1576 } 1577 } else { 1578 // Split into two instructions. 1579 unsigned NewOpc = (isLd) 1580 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1581 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1582 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1583 // so adjust and use t2LDRi12 here for that. 1584 unsigned NewOpc2 = (isLd) 1585 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1586 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1587 DebugLoc dl = MBBI->getDebugLoc(); 1588 // If this is a load and base register is killed, it may have been 1589 // re-defed by the load, make sure the first load does not clobber it. 1590 if (isLd && 1591 (BaseKill || OffKill) && 1592 (TRI->regsOverlap(EvenReg, BaseReg))) { 1593 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1594 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1595 OddReg, OddDeadKill, false, 1596 BaseReg, false, BaseUndef, false, OffUndef, 1597 Pred, PredReg, TII, isT2); 1598 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1599 EvenReg, EvenDeadKill, false, 1600 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1601 Pred, PredReg, TII, isT2); 1602 } else { 1603 if (OddReg == EvenReg && EvenDeadKill) { 1604 // If the two source operands are the same, the kill marker is 1605 // probably on the first one. e.g. 1606 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1607 EvenDeadKill = false; 1608 OddDeadKill = true; 1609 } 1610 // Never kill the base register in the first instruction. 1611 if (EvenReg == BaseReg) 1612 EvenDeadKill = false; 1613 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1614 EvenReg, EvenDeadKill, EvenUndef, 1615 BaseReg, false, BaseUndef, false, OffUndef, 1616 Pred, PredReg, TII, isT2); 1617 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1618 OddReg, OddDeadKill, OddUndef, 1619 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1620 Pred, PredReg, TII, isT2); 1621 } 1622 if (isLd) 1623 ++NumLDRD2LDR; 1624 else 1625 ++NumSTRD2STR; 1626 } 1627 1628 MBBI = MBB.erase(MBBI); 1629 return true; 1630 } 1631 1632 /// An optimization pass to turn multiple LDR / STR ops of the same base and 1633 /// incrementing offset into LDM / STM ops. 1634 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1635 MemOpQueue MemOps; 1636 unsigned CurrBase = 0; 1637 unsigned CurrOpc = ~0u; 1638 ARMCC::CondCodes CurrPred = ARMCC::AL; 1639 unsigned Position = 0; 1640 assert(Candidates.size() == 0); 1641 assert(MergeBaseCandidates.size() == 0); 1642 LiveRegsValid = false; 1643 1644 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin(); 1645 I = MBBI) { 1646 // The instruction in front of the iterator is the one we look at. 1647 MBBI = std::prev(I); 1648 if (FixInvalidRegPairOp(MBB, MBBI)) 1649 continue; 1650 ++Position; 1651 1652 if (isMemoryOp(*MBBI)) { 1653 unsigned Opcode = MBBI->getOpcode(); 1654 const MachineOperand &MO = MBBI->getOperand(0); 1655 unsigned Reg = MO.getReg(); 1656 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); 1657 unsigned PredReg = 0; 1658 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1659 int Offset = getMemoryOpOffset(MBBI); 1660 if (CurrBase == 0) { 1661 // Start of a new chain. 1662 CurrBase = Base; 1663 CurrOpc = Opcode; 1664 CurrPred = Pred; 1665 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1666 continue; 1667 } 1668 // Note: No need to match PredReg in the next if. 1669 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1670 // Watch out for: 1671 // r4 := ldr [r0, #8] 1672 // r4 := ldr [r0, #4] 1673 // or 1674 // r0 := ldr [r0] 1675 // If a load overrides the base register or a register loaded by 1676 // another load in our chain, we cannot take this instruction. 1677 bool Overlap = false; 1678 if (isLoadSingle(Opcode)) { 1679 Overlap = (Base == Reg); 1680 if (!Overlap) { 1681 for (const MemOpQueueEntry &E : MemOps) { 1682 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) { 1683 Overlap = true; 1684 break; 1685 } 1686 } 1687 } 1688 } 1689 1690 if (!Overlap) { 1691 // Check offset and sort memory operation into the current chain. 1692 if (Offset > MemOps.back().Offset) { 1693 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); 1694 continue; 1695 } else { 1696 MemOpQueue::iterator MI, ME; 1697 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) { 1698 if (Offset < MI->Offset) { 1699 // Found a place to insert. 1700 break; 1701 } 1702 if (Offset == MI->Offset) { 1703 // Collision, abort. 1704 MI = ME; 1705 break; 1706 } 1707 } 1708 if (MI != MemOps.end()) { 1709 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position)); 1710 continue; 1711 } 1712 } 1713 } 1714 } 1715 1716 // Don't advance the iterator; The op will start a new chain next. 1717 MBBI = I; 1718 --Position; 1719 // Fallthrough to look into existing chain. 1720 } else if (MBBI->isDebugValue()) { 1721 continue; 1722 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 || 1723 MBBI->getOpcode() == ARM::t2STRDi8) { 1724 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions 1725 // remember them because we may still be able to merge add/sub into them. 1726 MergeBaseCandidates.push_back(MBBI); 1727 } 1728 1729 1730 // If we are here then the chain is broken; Extract candidates for a merge. 1731 if (MemOps.size() > 0) { 1732 FormCandidates(MemOps); 1733 // Reset for the next chain. 1734 CurrBase = 0; 1735 CurrOpc = ~0u; 1736 CurrPred = ARMCC::AL; 1737 MemOps.clear(); 1738 } 1739 } 1740 if (MemOps.size() > 0) 1741 FormCandidates(MemOps); 1742 1743 // Sort candidates so they get processed from end to begin of the basic 1744 // block later; This is necessary for liveness calculation. 1745 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) { 1746 return M0->InsertPos < M1->InsertPos; 1747 }; 1748 std::sort(Candidates.begin(), Candidates.end(), LessThan); 1749 1750 // Go through list of candidates and merge. 1751 bool Changed = false; 1752 for (const MergeCandidate *Candidate : Candidates) { 1753 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) { 1754 MachineInstr *Merged = MergeOpsUpdate(*Candidate); 1755 // Merge preceding/trailing base inc/dec into the merged op. 1756 if (Merged) { 1757 Changed = true; 1758 unsigned Opcode = Merged->getOpcode(); 1759 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8) 1760 MergeBaseUpdateLSDouble(*Merged); 1761 else 1762 MergeBaseUpdateLSMultiple(Merged); 1763 } else { 1764 for (MachineInstr *MI : Candidate->Instrs) { 1765 if (MergeBaseUpdateLoadStore(MI)) 1766 Changed = true; 1767 } 1768 } 1769 } else { 1770 assert(Candidate->Instrs.size() == 1); 1771 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front())) 1772 Changed = true; 1773 } 1774 } 1775 Candidates.clear(); 1776 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt. 1777 for (MachineInstr *MI : MergeBaseCandidates) 1778 MergeBaseUpdateLSDouble(*MI); 1779 MergeBaseCandidates.clear(); 1780 1781 return Changed; 1782 } 1783 1784 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") 1785 /// into the preceding stack restore so it directly restore the value of LR 1786 /// into pc. 1787 /// ldmfd sp!, {..., lr} 1788 /// bx lr 1789 /// or 1790 /// ldmfd sp!, {..., lr} 1791 /// mov pc, lr 1792 /// => 1793 /// ldmfd sp!, {..., pc} 1794 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1795 // Thumb1 LDM doesn't allow high registers. 1796 if (isThumb1) return false; 1797 if (MBB.empty()) return false; 1798 1799 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1800 if (MBBI != MBB.begin() && 1801 (MBBI->getOpcode() == ARM::BX_RET || 1802 MBBI->getOpcode() == ARM::tBX_RET || 1803 MBBI->getOpcode() == ARM::MOVPCLR)) { 1804 MachineBasicBlock::iterator PrevI = std::prev(MBBI); 1805 // Ignore any DBG_VALUE instructions. 1806 while (PrevI->isDebugValue() && PrevI != MBB.begin()) 1807 --PrevI; 1808 MachineInstr *PrevMI = PrevI; 1809 unsigned Opcode = PrevMI->getOpcode(); 1810 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1811 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1812 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1813 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1814 if (MO.getReg() != ARM::LR) 1815 return false; 1816 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1817 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1818 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1819 PrevMI->setDesc(TII->get(NewOpc)); 1820 MO.setReg(ARM::PC); 1821 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1822 MBB.erase(MBBI); 1823 return true; 1824 } 1825 } 1826 return false; 1827 } 1828 1829 bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) { 1830 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1831 if (MBBI == MBB.begin() || MBBI == MBB.end() || 1832 MBBI->getOpcode() != ARM::tBX_RET) 1833 return false; 1834 1835 MachineBasicBlock::iterator Prev = MBBI; 1836 --Prev; 1837 if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR)) 1838 return false; 1839 1840 for (auto Use : Prev->uses()) 1841 if (Use.isKill()) { 1842 AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX)) 1843 .addReg(Use.getReg(), RegState::Kill)) 1844 .copyImplicitOps(&*MBBI); 1845 MBB.erase(MBBI); 1846 MBB.erase(Prev); 1847 return true; 1848 } 1849 1850 llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?"); 1851 } 1852 1853 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1854 MF = &Fn; 1855 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1856 TL = STI->getTargetLowering(); 1857 AFI = Fn.getInfo<ARMFunctionInfo>(); 1858 TII = STI->getInstrInfo(); 1859 TRI = STI->getRegisterInfo(); 1860 1861 RegClassInfoValid = false; 1862 isThumb2 = AFI->isThumb2Function(); 1863 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1864 1865 bool Modified = false; 1866 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1867 ++MFI) { 1868 MachineBasicBlock &MBB = *MFI; 1869 Modified |= LoadStoreMultipleOpti(MBB); 1870 if (STI->hasV5TOps()) 1871 Modified |= MergeReturnIntoLDM(MBB); 1872 if (isThumb1) 1873 Modified |= CombineMovBx(MBB); 1874 } 1875 1876 Allocator.DestroyAll(); 1877 return Modified; 1878 } 1879 1880 namespace llvm { 1881 void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); 1882 } 1883 1884 #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \ 1885 "ARM pre- register allocation load / store optimization pass" 1886 1887 namespace { 1888 /// Pre- register allocation pass that move load / stores from consecutive 1889 /// locations close to make it more likely they will be combined later. 1890 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1891 static char ID; 1892 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) { 1893 initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry()); 1894 } 1895 1896 const DataLayout *TD; 1897 const TargetInstrInfo *TII; 1898 const TargetRegisterInfo *TRI; 1899 const ARMSubtarget *STI; 1900 MachineRegisterInfo *MRI; 1901 MachineFunction *MF; 1902 1903 bool runOnMachineFunction(MachineFunction &Fn) override; 1904 1905 const char *getPassName() const override { 1906 return ARM_PREALLOC_LOAD_STORE_OPT_NAME; 1907 } 1908 1909 private: 1910 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1911 unsigned &NewOpc, unsigned &EvenReg, 1912 unsigned &OddReg, unsigned &BaseReg, 1913 int &Offset, 1914 unsigned &PredReg, ARMCC::CondCodes &Pred, 1915 bool &isT2); 1916 bool RescheduleOps(MachineBasicBlock *MBB, 1917 SmallVectorImpl<MachineInstr *> &Ops, 1918 unsigned Base, bool isLd, 1919 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1920 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1921 }; 1922 char ARMPreAllocLoadStoreOpt::ID = 0; 1923 } 1924 1925 INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt", 1926 ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false) 1927 1928 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1929 TD = &Fn.getDataLayout(); 1930 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1931 TII = STI->getInstrInfo(); 1932 TRI = STI->getRegisterInfo(); 1933 MRI = &Fn.getRegInfo(); 1934 MF = &Fn; 1935 1936 bool Modified = false; 1937 for (MachineBasicBlock &MFI : Fn) 1938 Modified |= RescheduleLoadStoreInstrs(&MFI); 1939 1940 return Modified; 1941 } 1942 1943 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1944 MachineBasicBlock::iterator I, 1945 MachineBasicBlock::iterator E, 1946 SmallPtrSetImpl<MachineInstr*> &MemOps, 1947 SmallSet<unsigned, 4> &MemRegs, 1948 const TargetRegisterInfo *TRI) { 1949 // Are there stores / loads / calls between them? 1950 // FIXME: This is overly conservative. We should make use of alias information 1951 // some day. 1952 SmallSet<unsigned, 4> AddedRegPressure; 1953 while (++I != E) { 1954 if (I->isDebugValue() || MemOps.count(&*I)) 1955 continue; 1956 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1957 return false; 1958 if (isLd && I->mayStore()) 1959 return false; 1960 if (!isLd) { 1961 if (I->mayLoad()) 1962 return false; 1963 // It's not safe to move the first 'str' down. 1964 // str r1, [r0] 1965 // strh r5, [r0] 1966 // str r4, [r0, #+4] 1967 if (I->mayStore()) 1968 return false; 1969 } 1970 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1971 MachineOperand &MO = I->getOperand(j); 1972 if (!MO.isReg()) 1973 continue; 1974 unsigned Reg = MO.getReg(); 1975 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1976 return false; 1977 if (Reg != Base && !MemRegs.count(Reg)) 1978 AddedRegPressure.insert(Reg); 1979 } 1980 } 1981 1982 // Estimate register pressure increase due to the transformation. 1983 if (MemRegs.size() <= 4) 1984 // Ok if we are moving small number of instructions. 1985 return true; 1986 return AddedRegPressure.size() <= MemRegs.size() * 2; 1987 } 1988 1989 1990 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI. 1991 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1992 MachineInstr *Op1) { 1993 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1994 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1995 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1996 1997 MachineFunction *MF = MI->getParent()->getParent(); 1998 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1999 MachineSDNode::mmo_iterator MemEnd = 2000 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 2001 MemEnd = 2002 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 2003 MI->setMemRefs(MemBegin, MemEnd); 2004 } 2005 2006 bool 2007 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 2008 DebugLoc &dl, unsigned &NewOpc, 2009 unsigned &FirstReg, 2010 unsigned &SecondReg, 2011 unsigned &BaseReg, int &Offset, 2012 unsigned &PredReg, 2013 ARMCC::CondCodes &Pred, 2014 bool &isT2) { 2015 // Make sure we're allowed to generate LDRD/STRD. 2016 if (!STI->hasV5TEOps()) 2017 return false; 2018 2019 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 2020 unsigned Scale = 1; 2021 unsigned Opcode = Op0->getOpcode(); 2022 if (Opcode == ARM::LDRi12) { 2023 NewOpc = ARM::LDRD; 2024 } else if (Opcode == ARM::STRi12) { 2025 NewOpc = ARM::STRD; 2026 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 2027 NewOpc = ARM::t2LDRDi8; 2028 Scale = 4; 2029 isT2 = true; 2030 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 2031 NewOpc = ARM::t2STRDi8; 2032 Scale = 4; 2033 isT2 = true; 2034 } else { 2035 return false; 2036 } 2037 2038 // Make sure the base address satisfies i64 ld / st alignment requirement. 2039 // At the moment, we ignore the memoryoperand's value. 2040 // If we want to use AliasAnalysis, we should check it accordingly. 2041 if (!Op0->hasOneMemOperand() || 2042 (*Op0->memoperands_begin())->isVolatile()) 2043 return false; 2044 2045 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 2046 const Function *Func = MF->getFunction(); 2047 unsigned ReqAlign = STI->hasV6Ops() 2048 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 2049 : 8; // Pre-v6 need 8-byte align 2050 if (Align < ReqAlign) 2051 return false; 2052 2053 // Then make sure the immediate offset fits. 2054 int OffImm = getMemoryOpOffset(Op0); 2055 if (isT2) { 2056 int Limit = (1 << 8) * Scale; 2057 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 2058 return false; 2059 Offset = OffImm; 2060 } else { 2061 ARM_AM::AddrOpc AddSub = ARM_AM::add; 2062 if (OffImm < 0) { 2063 AddSub = ARM_AM::sub; 2064 OffImm = - OffImm; 2065 } 2066 int Limit = (1 << 8) * Scale; 2067 if (OffImm >= Limit || (OffImm & (Scale-1))) 2068 return false; 2069 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2070 } 2071 FirstReg = Op0->getOperand(0).getReg(); 2072 SecondReg = Op1->getOperand(0).getReg(); 2073 if (FirstReg == SecondReg) 2074 return false; 2075 BaseReg = Op0->getOperand(1).getReg(); 2076 Pred = getInstrPredicate(Op0, PredReg); 2077 dl = Op0->getDebugLoc(); 2078 return true; 2079 } 2080 2081 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2082 SmallVectorImpl<MachineInstr *> &Ops, 2083 unsigned Base, bool isLd, 2084 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2085 bool RetVal = false; 2086 2087 // Sort by offset (in reverse order). 2088 std::sort(Ops.begin(), Ops.end(), 2089 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2090 int LOffset = getMemoryOpOffset(LHS); 2091 int ROffset = getMemoryOpOffset(RHS); 2092 assert(LHS == RHS || LOffset != ROffset); 2093 return LOffset > ROffset; 2094 }); 2095 2096 // The loads / stores of the same base are in order. Scan them from first to 2097 // last and check for the following: 2098 // 1. Any def of base. 2099 // 2. Any gaps. 2100 while (Ops.size() > 1) { 2101 unsigned FirstLoc = ~0U; 2102 unsigned LastLoc = 0; 2103 MachineInstr *FirstOp = nullptr; 2104 MachineInstr *LastOp = nullptr; 2105 int LastOffset = 0; 2106 unsigned LastOpcode = 0; 2107 unsigned LastBytes = 0; 2108 unsigned NumMove = 0; 2109 for (int i = Ops.size() - 1; i >= 0; --i) { 2110 MachineInstr *Op = Ops[i]; 2111 unsigned Loc = MI2LocMap[Op]; 2112 if (Loc <= FirstLoc) { 2113 FirstLoc = Loc; 2114 FirstOp = Op; 2115 } 2116 if (Loc >= LastLoc) { 2117 LastLoc = Loc; 2118 LastOp = Op; 2119 } 2120 2121 unsigned LSMOpcode 2122 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2123 if (LastOpcode && LSMOpcode != LastOpcode) 2124 break; 2125 2126 int Offset = getMemoryOpOffset(Op); 2127 unsigned Bytes = getLSMultipleTransferSize(Op); 2128 if (LastBytes) { 2129 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2130 break; 2131 } 2132 LastOffset = Offset; 2133 LastBytes = Bytes; 2134 LastOpcode = LSMOpcode; 2135 if (++NumMove == 8) // FIXME: Tune this limit. 2136 break; 2137 } 2138 2139 if (NumMove <= 1) 2140 Ops.pop_back(); 2141 else { 2142 SmallPtrSet<MachineInstr*, 4> MemOps; 2143 SmallSet<unsigned, 4> MemRegs; 2144 for (int i = NumMove-1; i >= 0; --i) { 2145 MemOps.insert(Ops[i]); 2146 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2147 } 2148 2149 // Be conservative, if the instructions are too far apart, don't 2150 // move them. We want to limit the increase of register pressure. 2151 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2152 if (DoMove) 2153 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2154 MemOps, MemRegs, TRI); 2155 if (!DoMove) { 2156 for (unsigned i = 0; i != NumMove; ++i) 2157 Ops.pop_back(); 2158 } else { 2159 // This is the new location for the loads / stores. 2160 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2161 while (InsertPos != MBB->end() 2162 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2163 ++InsertPos; 2164 2165 // If we are moving a pair of loads / stores, see if it makes sense 2166 // to try to allocate a pair of registers that can form register pairs. 2167 MachineInstr *Op0 = Ops.back(); 2168 MachineInstr *Op1 = Ops[Ops.size()-2]; 2169 unsigned FirstReg = 0, SecondReg = 0; 2170 unsigned BaseReg = 0, PredReg = 0; 2171 ARMCC::CondCodes Pred = ARMCC::AL; 2172 bool isT2 = false; 2173 unsigned NewOpc = 0; 2174 int Offset = 0; 2175 DebugLoc dl; 2176 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2177 FirstReg, SecondReg, BaseReg, 2178 Offset, PredReg, Pred, isT2)) { 2179 Ops.pop_back(); 2180 Ops.pop_back(); 2181 2182 const MCInstrDesc &MCID = TII->get(NewOpc); 2183 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2184 MRI->constrainRegClass(FirstReg, TRC); 2185 MRI->constrainRegClass(SecondReg, TRC); 2186 2187 // Form the pair instruction. 2188 if (isLd) { 2189 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2190 .addReg(FirstReg, RegState::Define) 2191 .addReg(SecondReg, RegState::Define) 2192 .addReg(BaseReg); 2193 // FIXME: We're converting from LDRi12 to an insn that still 2194 // uses addrmode2, so we need an explicit offset reg. It should 2195 // always by reg0 since we're transforming LDRi12s. 2196 if (!isT2) 2197 MIB.addReg(0); 2198 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2199 concatenateMemOperands(MIB, Op0, Op1); 2200 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2201 ++NumLDRDFormed; 2202 } else { 2203 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2204 .addReg(FirstReg) 2205 .addReg(SecondReg) 2206 .addReg(BaseReg); 2207 // FIXME: We're converting from LDRi12 to an insn that still 2208 // uses addrmode2, so we need an explicit offset reg. It should 2209 // always by reg0 since we're transforming STRi12s. 2210 if (!isT2) 2211 MIB.addReg(0); 2212 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2213 concatenateMemOperands(MIB, Op0, Op1); 2214 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2215 ++NumSTRDFormed; 2216 } 2217 MBB->erase(Op0); 2218 MBB->erase(Op1); 2219 2220 if (!isT2) { 2221 // Add register allocation hints to form register pairs. 2222 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); 2223 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); 2224 } 2225 } else { 2226 for (unsigned i = 0; i != NumMove; ++i) { 2227 MachineInstr *Op = Ops.back(); 2228 Ops.pop_back(); 2229 MBB->splice(InsertPos, MBB, Op); 2230 } 2231 } 2232 2233 NumLdStMoved += NumMove; 2234 RetVal = true; 2235 } 2236 } 2237 } 2238 2239 return RetVal; 2240 } 2241 2242 bool 2243 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2244 bool RetVal = false; 2245 2246 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2247 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2248 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2249 SmallVector<unsigned, 4> LdBases; 2250 SmallVector<unsigned, 4> StBases; 2251 2252 unsigned Loc = 0; 2253 MachineBasicBlock::iterator MBBI = MBB->begin(); 2254 MachineBasicBlock::iterator E = MBB->end(); 2255 while (MBBI != E) { 2256 for (; MBBI != E; ++MBBI) { 2257 MachineInstr *MI = MBBI; 2258 if (MI->isCall() || MI->isTerminator()) { 2259 // Stop at barriers. 2260 ++MBBI; 2261 break; 2262 } 2263 2264 if (!MI->isDebugValue()) 2265 MI2LocMap[MI] = ++Loc; 2266 2267 if (!isMemoryOp(*MI)) 2268 continue; 2269 unsigned PredReg = 0; 2270 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2271 continue; 2272 2273 int Opc = MI->getOpcode(); 2274 bool isLd = isLoadSingle(Opc); 2275 unsigned Base = MI->getOperand(1).getReg(); 2276 int Offset = getMemoryOpOffset(MI); 2277 2278 bool StopHere = false; 2279 if (isLd) { 2280 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2281 Base2LdsMap.find(Base); 2282 if (BI != Base2LdsMap.end()) { 2283 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2284 if (Offset == getMemoryOpOffset(BI->second[i])) { 2285 StopHere = true; 2286 break; 2287 } 2288 } 2289 if (!StopHere) 2290 BI->second.push_back(MI); 2291 } else { 2292 Base2LdsMap[Base].push_back(MI); 2293 LdBases.push_back(Base); 2294 } 2295 } else { 2296 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2297 Base2StsMap.find(Base); 2298 if (BI != Base2StsMap.end()) { 2299 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2300 if (Offset == getMemoryOpOffset(BI->second[i])) { 2301 StopHere = true; 2302 break; 2303 } 2304 } 2305 if (!StopHere) 2306 BI->second.push_back(MI); 2307 } else { 2308 Base2StsMap[Base].push_back(MI); 2309 StBases.push_back(Base); 2310 } 2311 } 2312 2313 if (StopHere) { 2314 // Found a duplicate (a base+offset combination that's seen earlier). 2315 // Backtrack. 2316 --Loc; 2317 break; 2318 } 2319 } 2320 2321 // Re-schedule loads. 2322 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2323 unsigned Base = LdBases[i]; 2324 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2325 if (Lds.size() > 1) 2326 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2327 } 2328 2329 // Re-schedule stores. 2330 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2331 unsigned Base = StBases[i]; 2332 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2333 if (Sts.size() > 1) 2334 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2335 } 2336 2337 if (MBBI != E) { 2338 Base2LdsMap.clear(); 2339 Base2StsMap.clear(); 2340 LdBases.clear(); 2341 StBases.clear(); 2342 } 2343 } 2344 2345 return RetVal; 2346 } 2347 2348 2349 /// Returns an instance of the load / store optimization pass. 2350 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2351 if (PreAlloc) 2352 return new ARMPreAllocLoadStoreOpt(); 2353 return new ARMLoadStoreOpt(); 2354 } 2355 2356