1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a pass that performs load / store related peephole 11 // optimizations. This pass should be run after register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-ldst-opt" 16 #include "ARM.h" 17 #include "ARMAddressingModes.h" 18 #include "ARMBaseInstrInfo.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMRegisterInfo.h" 21 #include "llvm/DerivedTypes.h" 22 #include "llvm/Function.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/RegisterScavenging.h" 29 #include "llvm/Target/TargetData.h" 30 #include "llvm/Target/TargetInstrInfo.h" 31 #include "llvm/Target/TargetMachine.h" 32 #include "llvm/Target/TargetRegisterInfo.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/ADT/DenseMap.h" 35 #include "llvm/ADT/STLExtras.h" 36 #include "llvm/ADT/SmallPtrSet.h" 37 #include "llvm/ADT/SmallSet.h" 38 #include "llvm/ADT/SmallVector.h" 39 #include "llvm/ADT/Statistic.h" 40 using namespace llvm; 41 42 STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 43 STATISTIC(NumSTMGened , "Number of stm instructions generated"); 44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 53 54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 55 /// load / store instructions to form ldm / stm instructions. 56 57 namespace { 58 struct ARMLoadStoreOpt : public MachineFunctionPass { 59 static char ID; 60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 61 62 const TargetInstrInfo *TII; 63 const TargetRegisterInfo *TRI; 64 ARMFunctionInfo *AFI; 65 RegScavenger *RS; 66 bool isThumb2; 67 68 virtual bool runOnMachineFunction(MachineFunction &Fn); 69 70 virtual const char *getPassName() const { 71 return "ARM load / store optimization pass"; 72 } 73 74 private: 75 struct MemOpQueueEntry { 76 int Offset; 77 unsigned Reg; 78 bool isKill; 79 unsigned Position; 80 MachineBasicBlock::iterator MBBI; 81 bool Merged; 82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 83 MachineBasicBlock::iterator i) 84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 85 }; 86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 87 typedef MemOpQueue::iterator MemOpQueueIter; 88 89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 90 int Offset, unsigned Base, bool BaseKill, int Opcode, 91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 93 void MergeOpsUpdate(MachineBasicBlock &MBB, 94 MemOpQueue &MemOps, 95 unsigned memOpsBegin, 96 unsigned memOpsEnd, 97 unsigned insertAfter, 98 int Offset, 99 unsigned Base, 100 bool BaseKill, 101 int Opcode, 102 ARMCC::CondCodes Pred, 103 unsigned PredReg, 104 unsigned Scratch, 105 DebugLoc dl, 106 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 108 int Opcode, unsigned Size, 109 ARMCC::CondCodes Pred, unsigned PredReg, 110 unsigned Scratch, MemOpQueue &MemOps, 111 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 112 113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 115 MachineBasicBlock::iterator &MBBI); 116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 117 MachineBasicBlock::iterator MBBI, 118 const TargetInstrInfo *TII, 119 bool &Advance, 120 MachineBasicBlock::iterator &I); 121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 122 MachineBasicBlock::iterator MBBI, 123 bool &Advance, 124 MachineBasicBlock::iterator &I); 125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 127 }; 128 char ARMLoadStoreOpt::ID = 0; 129 } 130 131 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 132 switch (Opcode) { 133 default: llvm_unreachable("Unhandled opcode!"); 134 case ARM::LDRi12: 135 ++NumLDMGened; 136 switch (Mode) { 137 default: llvm_unreachable("Unhandled submode!"); 138 case ARM_AM::ia: return ARM::LDMIA; 139 case ARM_AM::da: return ARM::LDMDA; 140 case ARM_AM::db: return ARM::LDMDB; 141 case ARM_AM::ib: return ARM::LDMIB; 142 } 143 break; 144 case ARM::STRi12: 145 ++NumSTMGened; 146 switch (Mode) { 147 default: llvm_unreachable("Unhandled submode!"); 148 case ARM_AM::ia: return ARM::STMIA; 149 case ARM_AM::da: return ARM::STMDA; 150 case ARM_AM::db: return ARM::STMDB; 151 case ARM_AM::ib: return ARM::STMIB; 152 } 153 break; 154 case ARM::t2LDRi8: 155 case ARM::t2LDRi12: 156 ++NumLDMGened; 157 switch (Mode) { 158 default: llvm_unreachable("Unhandled submode!"); 159 case ARM_AM::ia: return ARM::t2LDMIA; 160 case ARM_AM::db: return ARM::t2LDMDB; 161 } 162 break; 163 case ARM::t2STRi8: 164 case ARM::t2STRi12: 165 ++NumSTMGened; 166 switch (Mode) { 167 default: llvm_unreachable("Unhandled submode!"); 168 case ARM_AM::ia: return ARM::t2STMIA; 169 case ARM_AM::db: return ARM::t2STMDB; 170 } 171 break; 172 case ARM::VLDRS: 173 ++NumVLDMGened; 174 switch (Mode) { 175 default: llvm_unreachable("Unhandled submode!"); 176 case ARM_AM::ia: return ARM::VLDMSIA; 177 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 178 } 179 break; 180 case ARM::VSTRS: 181 ++NumVSTMGened; 182 switch (Mode) { 183 default: llvm_unreachable("Unhandled submode!"); 184 case ARM_AM::ia: return ARM::VSTMSIA; 185 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 186 } 187 break; 188 case ARM::VLDRD: 189 ++NumVLDMGened; 190 switch (Mode) { 191 default: llvm_unreachable("Unhandled submode!"); 192 case ARM_AM::ia: return ARM::VLDMDIA; 193 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 194 } 195 break; 196 case ARM::VSTRD: 197 ++NumVSTMGened; 198 switch (Mode) { 199 default: llvm_unreachable("Unhandled submode!"); 200 case ARM_AM::ia: return ARM::VSTMDIA; 201 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 202 } 203 break; 204 } 205 206 return 0; 207 } 208 209 namespace llvm { 210 namespace ARM_AM { 211 212 AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 213 switch (Opcode) { 214 default: llvm_unreachable("Unhandled opcode!"); 215 case ARM::LDMIA_RET: 216 case ARM::LDMIA: 217 case ARM::LDMIA_UPD: 218 case ARM::STMIA: 219 case ARM::STMIA_UPD: 220 case ARM::t2LDMIA_RET: 221 case ARM::t2LDMIA: 222 case ARM::t2LDMIA_UPD: 223 case ARM::t2STMIA: 224 case ARM::t2STMIA_UPD: 225 case ARM::VLDMSIA: 226 case ARM::VLDMSIA_UPD: 227 case ARM::VSTMSIA: 228 case ARM::VSTMSIA_UPD: 229 case ARM::VLDMDIA: 230 case ARM::VLDMDIA_UPD: 231 case ARM::VSTMDIA: 232 case ARM::VSTMDIA_UPD: 233 return ARM_AM::ia; 234 235 case ARM::LDMDA: 236 case ARM::LDMDA_UPD: 237 case ARM::STMDA: 238 case ARM::STMDA_UPD: 239 return ARM_AM::da; 240 241 case ARM::LDMDB: 242 case ARM::LDMDB_UPD: 243 case ARM::STMDB: 244 case ARM::STMDB_UPD: 245 case ARM::t2LDMDB: 246 case ARM::t2LDMDB_UPD: 247 case ARM::t2STMDB: 248 case ARM::t2STMDB_UPD: 249 case ARM::VLDMSDB_UPD: 250 case ARM::VSTMSDB_UPD: 251 case ARM::VLDMDDB_UPD: 252 case ARM::VSTMDDB_UPD: 253 return ARM_AM::db; 254 255 case ARM::LDMIB: 256 case ARM::LDMIB_UPD: 257 case ARM::STMIB: 258 case ARM::STMIB_UPD: 259 return ARM_AM::ib; 260 } 261 262 return ARM_AM::bad_am_submode; 263 } 264 265 } // end namespace ARM_AM 266 } // end namespace llvm 267 268 static bool isT2i32Load(unsigned Opc) { 269 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 270 } 271 272 static bool isi32Load(unsigned Opc) { 273 return Opc == ARM::LDRi12 || isT2i32Load(Opc); 274 } 275 276 static bool isT2i32Store(unsigned Opc) { 277 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 278 } 279 280 static bool isi32Store(unsigned Opc) { 281 return Opc == ARM::STRi12 || isT2i32Store(Opc); 282 } 283 284 /// MergeOps - Create and insert a LDM or STM with Base as base register and 285 /// registers in Regs as the register operands that would be loaded / stored. 286 /// It returns true if the transformation is done. 287 bool 288 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 289 MachineBasicBlock::iterator MBBI, 290 int Offset, unsigned Base, bool BaseKill, 291 int Opcode, ARMCC::CondCodes Pred, 292 unsigned PredReg, unsigned Scratch, DebugLoc dl, 293 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 294 // Only a single register to load / store. Don't bother. 295 unsigned NumRegs = Regs.size(); 296 if (NumRegs <= 1) 297 return false; 298 299 ARM_AM::AMSubMode Mode = ARM_AM::ia; 300 // VFP and Thumb2 do not support IB or DA modes. 301 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 302 bool haveIBAndDA = isNotVFP && !isThumb2; 303 if (Offset == 4 && haveIBAndDA) 304 Mode = ARM_AM::ib; 305 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) 306 Mode = ARM_AM::da; 307 else if (Offset == -4 * (int)NumRegs && isNotVFP) 308 // VLDM/VSTM do not support DB mode without also updating the base reg. 309 Mode = ARM_AM::db; 310 else if (Offset != 0) { 311 // Check if this is a supported opcode before we insert instructions to 312 // calculate a new base register. 313 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 314 315 // If starting offset isn't zero, insert a MI to materialize a new base. 316 // But only do so if it is cost effective, i.e. merging more than two 317 // loads / stores. 318 if (NumRegs <= 2) 319 return false; 320 321 unsigned NewBase; 322 if (isi32Load(Opcode)) 323 // If it is a load, then just use one of the destination register to 324 // use as the new base. 325 NewBase = Regs[NumRegs-1].first; 326 else { 327 // Use the scratch register to use as a new base. 328 NewBase = Scratch; 329 if (NewBase == 0) 330 return false; 331 } 332 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri; 333 if (Offset < 0) { 334 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; 335 Offset = - Offset; 336 } 337 int ImmedOffset = isThumb2 338 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 339 if (ImmedOffset == -1) 340 // FIXME: Try t2ADDri12 or t2SUBri12? 341 return false; // Probably not worth it then. 342 343 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 344 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 345 .addImm(Pred).addReg(PredReg).addReg(0); 346 Base = NewBase; 347 BaseKill = true; // New base is always killed right its use. 348 } 349 350 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 351 Opcode == ARM::VLDRD); 352 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 353 if (!Opcode) return false; 354 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 355 .addReg(Base, getKillRegState(BaseKill)) 356 .addImm(Pred).addReg(PredReg); 357 for (unsigned i = 0; i != NumRegs; ++i) 358 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 359 | getKillRegState(Regs[i].second)); 360 361 return true; 362 } 363 364 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 365 // success. 366 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 367 MemOpQueue &memOps, 368 unsigned memOpsBegin, unsigned memOpsEnd, 369 unsigned insertAfter, int Offset, 370 unsigned Base, bool BaseKill, 371 int Opcode, 372 ARMCC::CondCodes Pred, unsigned PredReg, 373 unsigned Scratch, 374 DebugLoc dl, 375 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 376 // First calculate which of the registers should be killed by the merged 377 // instruction. 378 const unsigned insertPos = memOps[insertAfter].Position; 379 SmallSet<unsigned, 4> KilledRegs; 380 DenseMap<unsigned, unsigned> Killer; 381 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 382 if (i == memOpsBegin) { 383 i = memOpsEnd; 384 if (i == e) 385 break; 386 } 387 if (memOps[i].Position < insertPos && memOps[i].isKill) { 388 unsigned Reg = memOps[i].Reg; 389 KilledRegs.insert(Reg); 390 Killer[Reg] = i; 391 } 392 } 393 394 SmallVector<std::pair<unsigned, bool>, 8> Regs; 395 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 396 unsigned Reg = memOps[i].Reg; 397 // If we are inserting the merged operation after an operation that 398 // uses the same register, make sure to transfer any kill flag. 399 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 400 Regs.push_back(std::make_pair(Reg, isKill)); 401 } 402 403 // Try to do the merge. 404 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 405 ++Loc; 406 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 407 Pred, PredReg, Scratch, dl, Regs)) 408 return; 409 410 // Merge succeeded, update records. 411 Merges.push_back(prior(Loc)); 412 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 413 // Remove kill flags from any memops that come before insertPos. 414 if (Regs[i-memOpsBegin].second) { 415 unsigned Reg = Regs[i-memOpsBegin].first; 416 if (KilledRegs.count(Reg)) { 417 unsigned j = Killer[Reg]; 418 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 419 assert(Idx >= 0 && "Cannot find killing operand"); 420 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 421 memOps[j].isKill = false; 422 } 423 memOps[i].isKill = true; 424 } 425 MBB.erase(memOps[i].MBBI); 426 // Update this memop to refer to the merged instruction. 427 // We may need to move kill flags again. 428 memOps[i].Merged = true; 429 memOps[i].MBBI = Merges.back(); 430 memOps[i].Position = insertPos; 431 } 432 } 433 434 /// MergeLDR_STR - Merge a number of load / store instructions into one or more 435 /// load / store multiple instructions. 436 void 437 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 438 unsigned Base, int Opcode, unsigned Size, 439 ARMCC::CondCodes Pred, unsigned PredReg, 440 unsigned Scratch, MemOpQueue &MemOps, 441 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 442 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 443 int Offset = MemOps[SIndex].Offset; 444 int SOffset = Offset; 445 unsigned insertAfter = SIndex; 446 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 447 DebugLoc dl = Loc->getDebugLoc(); 448 const MachineOperand &PMO = Loc->getOperand(0); 449 unsigned PReg = PMO.getReg(); 450 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 451 : getARMRegisterNumbering(PReg); 452 unsigned Count = 1; 453 unsigned Limit = ~0U; 454 455 // vldm / vstm limit are 32 for S variants, 16 for D variants. 456 457 switch (Opcode) { 458 default: break; 459 case ARM::VSTRS: 460 Limit = 32; 461 break; 462 case ARM::VSTRD: 463 Limit = 16; 464 break; 465 case ARM::VLDRD: 466 Limit = 16; 467 break; 468 case ARM::VLDRS: 469 Limit = 32; 470 break; 471 } 472 473 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 474 int NewOffset = MemOps[i].Offset; 475 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 476 unsigned Reg = MO.getReg(); 477 unsigned RegNum = MO.isUndef() ? UINT_MAX 478 : getARMRegisterNumbering(Reg); 479 // Register numbers must be in ascending order. For VFP / NEON load and 480 // store multiples, the registers must also be consecutive and within the 481 // limit on the number of registers per instruction. 482 if (Reg != ARM::SP && 483 NewOffset == Offset + (int)Size && 484 ((isNotVFP && RegNum > PRegNum) || 485 ((Count < Limit) && RegNum == PRegNum+1))) { 486 Offset += Size; 487 PRegNum = RegNum; 488 ++Count; 489 } else { 490 // Can't merge this in. Try merge the earlier ones first. 491 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 492 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 493 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 494 MemOps, Merges); 495 return; 496 } 497 498 if (MemOps[i].Position > MemOps[insertAfter].Position) 499 insertAfter = i; 500 } 501 502 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 503 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 504 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 505 return; 506 } 507 508 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 509 unsigned Bytes, unsigned Limit, 510 ARMCC::CondCodes Pred, unsigned PredReg){ 511 unsigned MyPredReg = 0; 512 if (!MI) 513 return false; 514 if (MI->getOpcode() != ARM::t2SUBri && 515 MI->getOpcode() != ARM::tSUBspi && 516 MI->getOpcode() != ARM::SUBri) 517 return false; 518 519 // Make sure the offset fits in 8 bits. 520 if (Bytes == 0 || (Limit && Bytes >= Limit)) 521 return false; 522 523 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 524 return (MI->getOperand(0).getReg() == Base && 525 MI->getOperand(1).getReg() == Base && 526 (MI->getOperand(2).getImm()*Scale) == Bytes && 527 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 528 MyPredReg == PredReg); 529 } 530 531 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 532 unsigned Bytes, unsigned Limit, 533 ARMCC::CondCodes Pred, unsigned PredReg){ 534 unsigned MyPredReg = 0; 535 if (!MI) 536 return false; 537 if (MI->getOpcode() != ARM::t2ADDri && 538 MI->getOpcode() != ARM::tADDspi && 539 MI->getOpcode() != ARM::ADDri) 540 return false; 541 542 if (Bytes == 0 || (Limit && Bytes >= Limit)) 543 // Make sure the offset fits in 8 bits. 544 return false; 545 546 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 547 return (MI->getOperand(0).getReg() == Base && 548 MI->getOperand(1).getReg() == Base && 549 (MI->getOperand(2).getImm()*Scale) == Bytes && 550 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 551 MyPredReg == PredReg); 552 } 553 554 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 555 switch (MI->getOpcode()) { 556 default: return 0; 557 case ARM::LDRi12: 558 case ARM::STRi12: 559 case ARM::t2LDRi8: 560 case ARM::t2LDRi12: 561 case ARM::t2STRi8: 562 case ARM::t2STRi12: 563 case ARM::VLDRS: 564 case ARM::VSTRS: 565 return 4; 566 case ARM::VLDRD: 567 case ARM::VSTRD: 568 return 8; 569 case ARM::LDMIA: 570 case ARM::LDMDA: 571 case ARM::LDMDB: 572 case ARM::LDMIB: 573 case ARM::STMIA: 574 case ARM::STMDA: 575 case ARM::STMDB: 576 case ARM::STMIB: 577 case ARM::t2LDMIA: 578 case ARM::t2LDMDB: 579 case ARM::t2STMIA: 580 case ARM::t2STMDB: 581 case ARM::VLDMSIA: 582 case ARM::VSTMSIA: 583 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 584 case ARM::VLDMDIA: 585 case ARM::VSTMDIA: 586 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 587 } 588 } 589 590 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 591 ARM_AM::AMSubMode Mode) { 592 switch (Opc) { 593 default: llvm_unreachable("Unhandled opcode!"); 594 case ARM::LDMIA: 595 case ARM::LDMDA: 596 case ARM::LDMDB: 597 case ARM::LDMIB: 598 switch (Mode) { 599 default: llvm_unreachable("Unhandled submode!"); 600 case ARM_AM::ia: return ARM::LDMIA_UPD; 601 case ARM_AM::ib: return ARM::LDMIB_UPD; 602 case ARM_AM::da: return ARM::LDMDA_UPD; 603 case ARM_AM::db: return ARM::LDMDB_UPD; 604 } 605 break; 606 case ARM::STMIA: 607 case ARM::STMDA: 608 case ARM::STMDB: 609 case ARM::STMIB: 610 switch (Mode) { 611 default: llvm_unreachable("Unhandled submode!"); 612 case ARM_AM::ia: return ARM::STMIA_UPD; 613 case ARM_AM::ib: return ARM::STMIB_UPD; 614 case ARM_AM::da: return ARM::STMDA_UPD; 615 case ARM_AM::db: return ARM::STMDB_UPD; 616 } 617 break; 618 case ARM::t2LDMIA: 619 case ARM::t2LDMDB: 620 switch (Mode) { 621 default: llvm_unreachable("Unhandled submode!"); 622 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 623 case ARM_AM::db: return ARM::t2LDMDB_UPD; 624 } 625 break; 626 case ARM::t2STMIA: 627 case ARM::t2STMDB: 628 switch (Mode) { 629 default: llvm_unreachable("Unhandled submode!"); 630 case ARM_AM::ia: return ARM::t2STMIA_UPD; 631 case ARM_AM::db: return ARM::t2STMDB_UPD; 632 } 633 break; 634 case ARM::VLDMSIA: 635 switch (Mode) { 636 default: llvm_unreachable("Unhandled submode!"); 637 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 638 case ARM_AM::db: return ARM::VLDMSDB_UPD; 639 } 640 break; 641 case ARM::VLDMDIA: 642 switch (Mode) { 643 default: llvm_unreachable("Unhandled submode!"); 644 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 645 case ARM_AM::db: return ARM::VLDMDDB_UPD; 646 } 647 break; 648 case ARM::VSTMSIA: 649 switch (Mode) { 650 default: llvm_unreachable("Unhandled submode!"); 651 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 652 case ARM_AM::db: return ARM::VSTMSDB_UPD; 653 } 654 break; 655 case ARM::VSTMDIA: 656 switch (Mode) { 657 default: llvm_unreachable("Unhandled submode!"); 658 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 659 case ARM_AM::db: return ARM::VSTMDDB_UPD; 660 } 661 break; 662 } 663 664 return 0; 665 } 666 667 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 668 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 669 /// 670 /// stmia rn, <ra, rb, rc> 671 /// rn := rn + 4 * 3; 672 /// => 673 /// stmia rn!, <ra, rb, rc> 674 /// 675 /// rn := rn - 4 * 3; 676 /// ldmia rn, <ra, rb, rc> 677 /// => 678 /// ldmdb rn!, <ra, rb, rc> 679 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 680 MachineBasicBlock::iterator MBBI, 681 bool &Advance, 682 MachineBasicBlock::iterator &I) { 683 MachineInstr *MI = MBBI; 684 unsigned Base = MI->getOperand(0).getReg(); 685 bool BaseKill = MI->getOperand(0).isKill(); 686 unsigned Bytes = getLSMultipleTransferSize(MI); 687 unsigned PredReg = 0; 688 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 689 int Opcode = MI->getOpcode(); 690 DebugLoc dl = MI->getDebugLoc(); 691 692 // Can't use an updating ld/st if the base register is also a dest 693 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 694 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 695 if (MI->getOperand(i).getReg() == Base) 696 return false; 697 698 bool DoMerge = false; 699 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 700 701 // Try merging with the previous instruction. 702 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 703 if (MBBI != BeginMBBI) { 704 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 705 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 706 --PrevMBBI; 707 if (Mode == ARM_AM::ia && 708 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 709 Mode = ARM_AM::db; 710 DoMerge = true; 711 } else if (Mode == ARM_AM::ib && 712 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 713 Mode = ARM_AM::da; 714 DoMerge = true; 715 } 716 if (DoMerge) 717 MBB.erase(PrevMBBI); 718 } 719 720 // Try merging with the next instruction. 721 MachineBasicBlock::iterator EndMBBI = MBB.end(); 722 if (!DoMerge && MBBI != EndMBBI) { 723 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 724 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 725 ++NextMBBI; 726 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 727 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 728 DoMerge = true; 729 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 730 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 731 DoMerge = true; 732 } 733 if (DoMerge) { 734 if (NextMBBI == I) { 735 Advance = true; 736 ++I; 737 } 738 MBB.erase(NextMBBI); 739 } 740 } 741 742 if (!DoMerge) 743 return false; 744 745 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 746 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 747 .addReg(Base, getDefRegState(true)) // WB base register 748 .addReg(Base, getKillRegState(BaseKill)) 749 .addImm(Pred).addReg(PredReg); 750 751 // Transfer the rest of operands. 752 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 753 MIB.addOperand(MI->getOperand(OpNum)); 754 755 // Transfer memoperands. 756 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 757 758 MBB.erase(MBBI); 759 return true; 760 } 761 762 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 763 ARM_AM::AddrOpc Mode) { 764 switch (Opc) { 765 case ARM::LDRi12: 766 return ARM::LDR_PRE; 767 case ARM::STRi12: 768 return ARM::STR_PRE; 769 case ARM::VLDRS: 770 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 771 case ARM::VLDRD: 772 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 773 case ARM::VSTRS: 774 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 775 case ARM::VSTRD: 776 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 777 case ARM::t2LDRi8: 778 case ARM::t2LDRi12: 779 return ARM::t2LDR_PRE; 780 case ARM::t2STRi8: 781 case ARM::t2STRi12: 782 return ARM::t2STR_PRE; 783 default: llvm_unreachable("Unhandled opcode!"); 784 } 785 return 0; 786 } 787 788 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 789 ARM_AM::AddrOpc Mode) { 790 switch (Opc) { 791 case ARM::LDRi12: 792 return ARM::LDR_POST; 793 case ARM::STRi12: 794 return ARM::STR_POST; 795 case ARM::VLDRS: 796 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 797 case ARM::VLDRD: 798 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 799 case ARM::VSTRS: 800 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 801 case ARM::VSTRD: 802 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 803 case ARM::t2LDRi8: 804 case ARM::t2LDRi12: 805 return ARM::t2LDR_POST; 806 case ARM::t2STRi8: 807 case ARM::t2STRi12: 808 return ARM::t2STR_POST; 809 default: llvm_unreachable("Unhandled opcode!"); 810 } 811 return 0; 812 } 813 814 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 815 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 816 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 817 MachineBasicBlock::iterator MBBI, 818 const TargetInstrInfo *TII, 819 bool &Advance, 820 MachineBasicBlock::iterator &I) { 821 MachineInstr *MI = MBBI; 822 unsigned Base = MI->getOperand(1).getReg(); 823 bool BaseKill = MI->getOperand(1).isKill(); 824 unsigned Bytes = getLSMultipleTransferSize(MI); 825 int Opcode = MI->getOpcode(); 826 DebugLoc dl = MI->getDebugLoc(); 827 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 828 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 829 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 830 if (isi32Load(Opcode) || isi32Store(Opcode)) 831 if (MI->getOperand(2).getImm() != 0) 832 return false; 833 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 834 return false; 835 836 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 837 // Can't do the merge if the destination register is the same as the would-be 838 // writeback register. 839 if (isLd && MI->getOperand(0).getReg() == Base) 840 return false; 841 842 unsigned PredReg = 0; 843 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 844 bool DoMerge = false; 845 ARM_AM::AddrOpc AddSub = ARM_AM::add; 846 unsigned NewOpc = 0; 847 // AM2 - 12 bits, thumb2 - 8 bits. 848 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 849 850 // Try merging with the previous instruction. 851 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 852 if (MBBI != BeginMBBI) { 853 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 854 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 855 --PrevMBBI; 856 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 857 DoMerge = true; 858 AddSub = ARM_AM::sub; 859 } else if (!isAM5 && 860 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 861 DoMerge = true; 862 } 863 if (DoMerge) { 864 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 865 MBB.erase(PrevMBBI); 866 } 867 } 868 869 // Try merging with the next instruction. 870 MachineBasicBlock::iterator EndMBBI = MBB.end(); 871 if (!DoMerge && MBBI != EndMBBI) { 872 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 873 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 874 ++NextMBBI; 875 if (!isAM5 && 876 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 877 DoMerge = true; 878 AddSub = ARM_AM::sub; 879 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 880 DoMerge = true; 881 } 882 if (DoMerge) { 883 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 884 if (NextMBBI == I) { 885 Advance = true; 886 ++I; 887 } 888 MBB.erase(NextMBBI); 889 } 890 } 891 892 if (!DoMerge) 893 return false; 894 895 unsigned Offset = 0; 896 if (isAM2) 897 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 898 else if (!isAM5) 899 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 900 901 if (isAM5) { 902 // VLDM[SD}_UPD, VSTM[SD]_UPD 903 // (There are no base-updating versions of VLDR/VSTR instructions, but the 904 // updating load/store-multiple instructions can be used with only one 905 // register.) 906 MachineOperand &MO = MI->getOperand(0); 907 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 908 .addReg(Base, getDefRegState(true)) // WB base register 909 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 910 .addImm(Pred).addReg(PredReg) 911 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 912 getKillRegState(MO.isKill()))); 913 } else if (isLd) { 914 if (isAM2) 915 // LDR_PRE, LDR_POST, 916 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 917 .addReg(Base, RegState::Define) 918 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 919 else 920 // t2LDR_PRE, t2LDR_POST 921 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 922 .addReg(Base, RegState::Define) 923 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 924 } else { 925 MachineOperand &MO = MI->getOperand(0); 926 if (isAM2) 927 // STR_PRE, STR_POST 928 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 929 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 930 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 931 else 932 // t2STR_PRE, t2STR_POST 933 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 934 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 935 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 936 } 937 MBB.erase(MBBI); 938 939 return true; 940 } 941 942 /// isMemoryOp - Returns true if instruction is a memory operation that this 943 /// pass is capable of operating on. 944 static bool isMemoryOp(const MachineInstr *MI) { 945 // When no memory operands are present, conservatively assume unaligned, 946 // volatile, unfoldable. 947 if (!MI->hasOneMemOperand()) 948 return false; 949 950 const MachineMemOperand *MMO = *MI->memoperands_begin(); 951 952 // Don't touch volatile memory accesses - we may be changing their order. 953 if (MMO->isVolatile()) 954 return false; 955 956 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 957 // not. 958 if (MMO->getAlignment() < 4) 959 return false; 960 961 // str <undef> could probably be eliminated entirely, but for now we just want 962 // to avoid making a mess of it. 963 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 964 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 965 MI->getOperand(0).isUndef()) 966 return false; 967 968 // Likewise don't mess with references to undefined addresses. 969 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 970 MI->getOperand(1).isUndef()) 971 return false; 972 973 int Opcode = MI->getOpcode(); 974 switch (Opcode) { 975 default: break; 976 case ARM::VLDRS: 977 case ARM::VSTRS: 978 return MI->getOperand(1).isReg(); 979 case ARM::VLDRD: 980 case ARM::VSTRD: 981 return MI->getOperand(1).isReg(); 982 case ARM::LDRi12: 983 case ARM::STRi12: 984 case ARM::t2LDRi8: 985 case ARM::t2LDRi12: 986 case ARM::t2STRi8: 987 case ARM::t2STRi12: 988 return MI->getOperand(1).isReg(); 989 } 990 return false; 991 } 992 993 /// AdvanceRS - Advance register scavenger to just before the earliest memory 994 /// op that is being merged. 995 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 996 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 997 unsigned Position = MemOps[0].Position; 998 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 999 if (MemOps[i].Position < Position) { 1000 Position = MemOps[i].Position; 1001 Loc = MemOps[i].MBBI; 1002 } 1003 } 1004 1005 if (Loc != MBB.begin()) 1006 RS->forward(prior(Loc)); 1007 } 1008 1009 static int getMemoryOpOffset(const MachineInstr *MI) { 1010 int Opcode = MI->getOpcode(); 1011 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 1012 unsigned NumOperands = MI->getDesc().getNumOperands(); 1013 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 1014 1015 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 1016 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 1017 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 1018 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 1019 return OffField; 1020 1021 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 1022 : ARM_AM::getAM5Offset(OffField) * 4; 1023 if (isAM3) { 1024 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 1025 Offset = -Offset; 1026 } else { 1027 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 1028 Offset = -Offset; 1029 } 1030 return Offset; 1031 } 1032 1033 static void InsertLDR_STR(MachineBasicBlock &MBB, 1034 MachineBasicBlock::iterator &MBBI, 1035 int Offset, bool isDef, 1036 DebugLoc dl, unsigned NewOpc, 1037 unsigned Reg, bool RegDeadKill, bool RegUndef, 1038 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1039 bool OffKill, bool OffUndef, 1040 ARMCC::CondCodes Pred, unsigned PredReg, 1041 const TargetInstrInfo *TII, bool isT2) { 1042 if (isDef) { 1043 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1044 TII->get(NewOpc)) 1045 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1046 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1047 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1048 } else { 1049 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1050 TII->get(NewOpc)) 1051 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1052 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1053 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1054 } 1055 } 1056 1057 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1058 MachineBasicBlock::iterator &MBBI) { 1059 MachineInstr *MI = &*MBBI; 1060 unsigned Opcode = MI->getOpcode(); 1061 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1062 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1063 unsigned EvenReg = MI->getOperand(0).getReg(); 1064 unsigned OddReg = MI->getOperand(1).getReg(); 1065 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1066 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1067 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) 1068 return false; 1069 1070 MachineBasicBlock::iterator NewBBI = MBBI; 1071 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1072 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1073 bool EvenDeadKill = isLd ? 1074 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1075 bool EvenUndef = MI->getOperand(0).isUndef(); 1076 bool OddDeadKill = isLd ? 1077 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1078 bool OddUndef = MI->getOperand(1).isUndef(); 1079 const MachineOperand &BaseOp = MI->getOperand(2); 1080 unsigned BaseReg = BaseOp.getReg(); 1081 bool BaseKill = BaseOp.isKill(); 1082 bool BaseUndef = BaseOp.isUndef(); 1083 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1084 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1085 int OffImm = getMemoryOpOffset(MI); 1086 unsigned PredReg = 0; 1087 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 1088 1089 if (OddRegNum > EvenRegNum && OffImm == 0) { 1090 // Ascending register numbers and no offset. It's safe to change it to a 1091 // ldm or stm. 1092 unsigned NewOpc = (isLd) 1093 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1094 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1095 if (isLd) { 1096 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1097 .addReg(BaseReg, getKillRegState(BaseKill)) 1098 .addImm(Pred).addReg(PredReg) 1099 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1100 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1101 ++NumLDRD2LDM; 1102 } else { 1103 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1104 .addReg(BaseReg, getKillRegState(BaseKill)) 1105 .addImm(Pred).addReg(PredReg) 1106 .addReg(EvenReg, 1107 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1108 .addReg(OddReg, 1109 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1110 ++NumSTRD2STM; 1111 } 1112 NewBBI = llvm::prior(MBBI); 1113 } else { 1114 // Split into two instructions. 1115 unsigned NewOpc = (isLd) 1116 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1117 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1118 DebugLoc dl = MBBI->getDebugLoc(); 1119 // If this is a load and base register is killed, it may have been 1120 // re-defed by the load, make sure the first load does not clobber it. 1121 if (isLd && 1122 (BaseKill || OffKill) && 1123 (TRI->regsOverlap(EvenReg, BaseReg))) { 1124 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1125 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1126 OddReg, OddDeadKill, false, 1127 BaseReg, false, BaseUndef, false, OffUndef, 1128 Pred, PredReg, TII, isT2); 1129 NewBBI = llvm::prior(MBBI); 1130 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1131 EvenReg, EvenDeadKill, false, 1132 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1133 Pred, PredReg, TII, isT2); 1134 } else { 1135 if (OddReg == EvenReg && EvenDeadKill) { 1136 // If the two source operands are the same, the kill marker is 1137 // probably on the first one. e.g. 1138 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1139 EvenDeadKill = false; 1140 OddDeadKill = true; 1141 } 1142 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1143 EvenReg, EvenDeadKill, EvenUndef, 1144 BaseReg, false, BaseUndef, false, OffUndef, 1145 Pred, PredReg, TII, isT2); 1146 NewBBI = llvm::prior(MBBI); 1147 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1148 OddReg, OddDeadKill, OddUndef, 1149 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1150 Pred, PredReg, TII, isT2); 1151 } 1152 if (isLd) 1153 ++NumLDRD2LDR; 1154 else 1155 ++NumSTRD2STR; 1156 } 1157 1158 MBB.erase(MI); 1159 MBBI = NewBBI; 1160 return true; 1161 } 1162 return false; 1163 } 1164 1165 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1166 /// ops of the same base and incrementing offset into LDM / STM ops. 1167 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1168 unsigned NumMerges = 0; 1169 unsigned NumMemOps = 0; 1170 MemOpQueue MemOps; 1171 unsigned CurrBase = 0; 1172 int CurrOpc = -1; 1173 unsigned CurrSize = 0; 1174 ARMCC::CondCodes CurrPred = ARMCC::AL; 1175 unsigned CurrPredReg = 0; 1176 unsigned Position = 0; 1177 SmallVector<MachineBasicBlock::iterator,4> Merges; 1178 1179 RS->enterBasicBlock(&MBB); 1180 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1181 while (MBBI != E) { 1182 if (FixInvalidRegPairOp(MBB, MBBI)) 1183 continue; 1184 1185 bool Advance = false; 1186 bool TryMerge = false; 1187 bool Clobber = false; 1188 1189 bool isMemOp = isMemoryOp(MBBI); 1190 if (isMemOp) { 1191 int Opcode = MBBI->getOpcode(); 1192 unsigned Size = getLSMultipleTransferSize(MBBI); 1193 const MachineOperand &MO = MBBI->getOperand(0); 1194 unsigned Reg = MO.getReg(); 1195 bool isKill = MO.isDef() ? false : MO.isKill(); 1196 unsigned Base = MBBI->getOperand(1).getReg(); 1197 unsigned PredReg = 0; 1198 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); 1199 int Offset = getMemoryOpOffset(MBBI); 1200 // Watch out for: 1201 // r4 := ldr [r5] 1202 // r5 := ldr [r5, #4] 1203 // r6 := ldr [r5, #8] 1204 // 1205 // The second ldr has effectively broken the chain even though it 1206 // looks like the later ldr(s) use the same base register. Try to 1207 // merge the ldr's so far, including this one. But don't try to 1208 // combine the following ldr(s). 1209 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1210 if (CurrBase == 0 && !Clobber) { 1211 // Start of a new chain. 1212 CurrBase = Base; 1213 CurrOpc = Opcode; 1214 CurrSize = Size; 1215 CurrPred = Pred; 1216 CurrPredReg = PredReg; 1217 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1218 ++NumMemOps; 1219 Advance = true; 1220 } else { 1221 if (Clobber) { 1222 TryMerge = true; 1223 Advance = true; 1224 } 1225 1226 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1227 // No need to match PredReg. 1228 // Continue adding to the queue. 1229 if (Offset > MemOps.back().Offset) { 1230 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1231 Position, MBBI)); 1232 ++NumMemOps; 1233 Advance = true; 1234 } else { 1235 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1236 I != E; ++I) { 1237 if (Offset < I->Offset) { 1238 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1239 Position, MBBI)); 1240 ++NumMemOps; 1241 Advance = true; 1242 break; 1243 } else if (Offset == I->Offset) { 1244 // Collision! This can't be merged! 1245 break; 1246 } 1247 } 1248 } 1249 } 1250 } 1251 } 1252 1253 if (MBBI->isDebugValue()) { 1254 ++MBBI; 1255 if (MBBI == E) 1256 // Reach the end of the block, try merging the memory instructions. 1257 TryMerge = true; 1258 } else if (Advance) { 1259 ++Position; 1260 ++MBBI; 1261 if (MBBI == E) 1262 // Reach the end of the block, try merging the memory instructions. 1263 TryMerge = true; 1264 } else 1265 TryMerge = true; 1266 1267 if (TryMerge) { 1268 if (NumMemOps > 1) { 1269 // Try to find a free register to use as a new base in case it's needed. 1270 // First advance to the instruction just before the start of the chain. 1271 AdvanceRS(MBB, MemOps); 1272 // Find a scratch register. 1273 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1274 // Process the load / store instructions. 1275 RS->forward(prior(MBBI)); 1276 1277 // Merge ops. 1278 Merges.clear(); 1279 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1280 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1281 1282 // Try folding preceding/trailing base inc/dec into the generated 1283 // LDM/STM ops. 1284 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1285 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1286 ++NumMerges; 1287 NumMerges += Merges.size(); 1288 1289 // Try folding preceding/trailing base inc/dec into those load/store 1290 // that were not merged to form LDM/STM ops. 1291 for (unsigned i = 0; i != NumMemOps; ++i) 1292 if (!MemOps[i].Merged) 1293 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1294 ++NumMerges; 1295 1296 // RS may be pointing to an instruction that's deleted. 1297 RS->skipTo(prior(MBBI)); 1298 } else if (NumMemOps == 1) { 1299 // Try folding preceding/trailing base inc/dec into the single 1300 // load/store. 1301 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1302 ++NumMerges; 1303 RS->forward(prior(MBBI)); 1304 } 1305 } 1306 1307 CurrBase = 0; 1308 CurrOpc = -1; 1309 CurrSize = 0; 1310 CurrPred = ARMCC::AL; 1311 CurrPredReg = 0; 1312 if (NumMemOps) { 1313 MemOps.clear(); 1314 NumMemOps = 0; 1315 } 1316 1317 // If iterator hasn't been advanced and this is not a memory op, skip it. 1318 // It can't start a new chain anyway. 1319 if (!Advance && !isMemOp && MBBI != E) { 1320 ++Position; 1321 ++MBBI; 1322 } 1323 } 1324 } 1325 return NumMerges > 0; 1326 } 1327 1328 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1329 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it 1330 /// directly restore the value of LR into pc. 1331 /// ldmfd sp!, {..., lr} 1332 /// bx lr 1333 /// or 1334 /// ldmfd sp!, {..., lr} 1335 /// mov pc, lr 1336 /// => 1337 /// ldmfd sp!, {..., pc} 1338 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1339 if (MBB.empty()) return false; 1340 1341 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1342 if (MBBI != MBB.begin() && 1343 (MBBI->getOpcode() == ARM::BX_RET || 1344 MBBI->getOpcode() == ARM::tBX_RET || 1345 MBBI->getOpcode() == ARM::MOVPCLR)) { 1346 MachineInstr *PrevMI = prior(MBBI); 1347 unsigned Opcode = PrevMI->getOpcode(); 1348 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1349 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1350 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1351 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1352 if (MO.getReg() != ARM::LR) 1353 return false; 1354 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1355 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1356 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1357 PrevMI->setDesc(TII->get(NewOpc)); 1358 MO.setReg(ARM::PC); 1359 PrevMI->copyImplicitOps(&*MBBI); 1360 MBB.erase(MBBI); 1361 return true; 1362 } 1363 } 1364 return false; 1365 } 1366 1367 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1368 const TargetMachine &TM = Fn.getTarget(); 1369 AFI = Fn.getInfo<ARMFunctionInfo>(); 1370 TII = TM.getInstrInfo(); 1371 TRI = TM.getRegisterInfo(); 1372 RS = new RegScavenger(); 1373 isThumb2 = AFI->isThumb2Function(); 1374 1375 bool Modified = false; 1376 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1377 ++MFI) { 1378 MachineBasicBlock &MBB = *MFI; 1379 Modified |= LoadStoreMultipleOpti(MBB); 1380 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) 1381 Modified |= MergeReturnIntoLDM(MBB); 1382 } 1383 1384 delete RS; 1385 return Modified; 1386 } 1387 1388 1389 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1390 /// load / stores from consecutive locations close to make it more 1391 /// likely they will be combined later. 1392 1393 namespace { 1394 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1395 static char ID; 1396 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1397 1398 const TargetData *TD; 1399 const TargetInstrInfo *TII; 1400 const TargetRegisterInfo *TRI; 1401 const ARMSubtarget *STI; 1402 MachineRegisterInfo *MRI; 1403 MachineFunction *MF; 1404 1405 virtual bool runOnMachineFunction(MachineFunction &Fn); 1406 1407 virtual const char *getPassName() const { 1408 return "ARM pre- register allocation load / store optimization pass"; 1409 } 1410 1411 private: 1412 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1413 unsigned &NewOpc, unsigned &EvenReg, 1414 unsigned &OddReg, unsigned &BaseReg, 1415 int &Offset, 1416 unsigned &PredReg, ARMCC::CondCodes &Pred, 1417 bool &isT2); 1418 bool RescheduleOps(MachineBasicBlock *MBB, 1419 SmallVector<MachineInstr*, 4> &Ops, 1420 unsigned Base, bool isLd, 1421 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1422 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1423 }; 1424 char ARMPreAllocLoadStoreOpt::ID = 0; 1425 } 1426 1427 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1428 TD = Fn.getTarget().getTargetData(); 1429 TII = Fn.getTarget().getInstrInfo(); 1430 TRI = Fn.getTarget().getRegisterInfo(); 1431 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1432 MRI = &Fn.getRegInfo(); 1433 MF = &Fn; 1434 1435 bool Modified = false; 1436 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1437 ++MFI) 1438 Modified |= RescheduleLoadStoreInstrs(MFI); 1439 1440 return Modified; 1441 } 1442 1443 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1444 MachineBasicBlock::iterator I, 1445 MachineBasicBlock::iterator E, 1446 SmallPtrSet<MachineInstr*, 4> &MemOps, 1447 SmallSet<unsigned, 4> &MemRegs, 1448 const TargetRegisterInfo *TRI) { 1449 // Are there stores / loads / calls between them? 1450 // FIXME: This is overly conservative. We should make use of alias information 1451 // some day. 1452 SmallSet<unsigned, 4> AddedRegPressure; 1453 while (++I != E) { 1454 if (I->isDebugValue() || MemOps.count(&*I)) 1455 continue; 1456 const MCInstrDesc &MCID = I->getDesc(); 1457 if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects()) 1458 return false; 1459 if (isLd && MCID.mayStore()) 1460 return false; 1461 if (!isLd) { 1462 if (MCID.mayLoad()) 1463 return false; 1464 // It's not safe to move the first 'str' down. 1465 // str r1, [r0] 1466 // strh r5, [r0] 1467 // str r4, [r0, #+4] 1468 if (MCID.mayStore()) 1469 return false; 1470 } 1471 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1472 MachineOperand &MO = I->getOperand(j); 1473 if (!MO.isReg()) 1474 continue; 1475 unsigned Reg = MO.getReg(); 1476 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1477 return false; 1478 if (Reg != Base && !MemRegs.count(Reg)) 1479 AddedRegPressure.insert(Reg); 1480 } 1481 } 1482 1483 // Estimate register pressure increase due to the transformation. 1484 if (MemRegs.size() <= 4) 1485 // Ok if we are moving small number of instructions. 1486 return true; 1487 return AddedRegPressure.size() <= MemRegs.size() * 2; 1488 } 1489 1490 bool 1491 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1492 DebugLoc &dl, 1493 unsigned &NewOpc, unsigned &EvenReg, 1494 unsigned &OddReg, unsigned &BaseReg, 1495 int &Offset, unsigned &PredReg, 1496 ARMCC::CondCodes &Pred, 1497 bool &isT2) { 1498 // Make sure we're allowed to generate LDRD/STRD. 1499 if (!STI->hasV5TEOps()) 1500 return false; 1501 1502 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1503 unsigned Scale = 1; 1504 unsigned Opcode = Op0->getOpcode(); 1505 if (Opcode == ARM::LDRi12) 1506 NewOpc = ARM::LDRD; 1507 else if (Opcode == ARM::STRi12) 1508 NewOpc = ARM::STRD; 1509 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1510 NewOpc = ARM::t2LDRDi8; 1511 Scale = 4; 1512 isT2 = true; 1513 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1514 NewOpc = ARM::t2STRDi8; 1515 Scale = 4; 1516 isT2 = true; 1517 } else 1518 return false; 1519 1520 // Make sure the base address satisfies i64 ld / st alignment requirement. 1521 if (!Op0->hasOneMemOperand() || 1522 !(*Op0->memoperands_begin())->getValue() || 1523 (*Op0->memoperands_begin())->isVolatile()) 1524 return false; 1525 1526 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1527 const Function *Func = MF->getFunction(); 1528 unsigned ReqAlign = STI->hasV6Ops() 1529 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1530 : 8; // Pre-v6 need 8-byte align 1531 if (Align < ReqAlign) 1532 return false; 1533 1534 // Then make sure the immediate offset fits. 1535 int OffImm = getMemoryOpOffset(Op0); 1536 if (isT2) { 1537 int Limit = (1 << 8) * Scale; 1538 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 1539 return false; 1540 Offset = OffImm; 1541 } else { 1542 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1543 if (OffImm < 0) { 1544 AddSub = ARM_AM::sub; 1545 OffImm = - OffImm; 1546 } 1547 int Limit = (1 << 8) * Scale; 1548 if (OffImm >= Limit || (OffImm & (Scale-1))) 1549 return false; 1550 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1551 } 1552 EvenReg = Op0->getOperand(0).getReg(); 1553 OddReg = Op1->getOperand(0).getReg(); 1554 if (EvenReg == OddReg) 1555 return false; 1556 BaseReg = Op0->getOperand(1).getReg(); 1557 Pred = llvm::getInstrPredicate(Op0, PredReg); 1558 dl = Op0->getDebugLoc(); 1559 return true; 1560 } 1561 1562 namespace { 1563 struct OffsetCompare { 1564 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1565 int LOffset = getMemoryOpOffset(LHS); 1566 int ROffset = getMemoryOpOffset(RHS); 1567 assert(LHS == RHS || LOffset != ROffset); 1568 return LOffset > ROffset; 1569 } 1570 }; 1571 } 1572 1573 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1574 SmallVector<MachineInstr*, 4> &Ops, 1575 unsigned Base, bool isLd, 1576 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1577 bool RetVal = false; 1578 1579 // Sort by offset (in reverse order). 1580 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1581 1582 // The loads / stores of the same base are in order. Scan them from first to 1583 // last and check for the following: 1584 // 1. Any def of base. 1585 // 2. Any gaps. 1586 while (Ops.size() > 1) { 1587 unsigned FirstLoc = ~0U; 1588 unsigned LastLoc = 0; 1589 MachineInstr *FirstOp = 0; 1590 MachineInstr *LastOp = 0; 1591 int LastOffset = 0; 1592 unsigned LastOpcode = 0; 1593 unsigned LastBytes = 0; 1594 unsigned NumMove = 0; 1595 for (int i = Ops.size() - 1; i >= 0; --i) { 1596 MachineInstr *Op = Ops[i]; 1597 unsigned Loc = MI2LocMap[Op]; 1598 if (Loc <= FirstLoc) { 1599 FirstLoc = Loc; 1600 FirstOp = Op; 1601 } 1602 if (Loc >= LastLoc) { 1603 LastLoc = Loc; 1604 LastOp = Op; 1605 } 1606 1607 unsigned Opcode = Op->getOpcode(); 1608 if (LastOpcode && Opcode != LastOpcode) 1609 break; 1610 1611 int Offset = getMemoryOpOffset(Op); 1612 unsigned Bytes = getLSMultipleTransferSize(Op); 1613 if (LastBytes) { 1614 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1615 break; 1616 } 1617 LastOffset = Offset; 1618 LastBytes = Bytes; 1619 LastOpcode = Opcode; 1620 if (++NumMove == 8) // FIXME: Tune this limit. 1621 break; 1622 } 1623 1624 if (NumMove <= 1) 1625 Ops.pop_back(); 1626 else { 1627 SmallPtrSet<MachineInstr*, 4> MemOps; 1628 SmallSet<unsigned, 4> MemRegs; 1629 for (int i = NumMove-1; i >= 0; --i) { 1630 MemOps.insert(Ops[i]); 1631 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1632 } 1633 1634 // Be conservative, if the instructions are too far apart, don't 1635 // move them. We want to limit the increase of register pressure. 1636 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1637 if (DoMove) 1638 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1639 MemOps, MemRegs, TRI); 1640 if (!DoMove) { 1641 for (unsigned i = 0; i != NumMove; ++i) 1642 Ops.pop_back(); 1643 } else { 1644 // This is the new location for the loads / stores. 1645 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1646 while (InsertPos != MBB->end() 1647 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 1648 ++InsertPos; 1649 1650 // If we are moving a pair of loads / stores, see if it makes sense 1651 // to try to allocate a pair of registers that can form register pairs. 1652 MachineInstr *Op0 = Ops.back(); 1653 MachineInstr *Op1 = Ops[Ops.size()-2]; 1654 unsigned EvenReg = 0, OddReg = 0; 1655 unsigned BaseReg = 0, PredReg = 0; 1656 ARMCC::CondCodes Pred = ARMCC::AL; 1657 bool isT2 = false; 1658 unsigned NewOpc = 0; 1659 int Offset = 0; 1660 DebugLoc dl; 1661 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1662 EvenReg, OddReg, BaseReg, 1663 Offset, PredReg, Pred, isT2)) { 1664 Ops.pop_back(); 1665 Ops.pop_back(); 1666 1667 const MCInstrDesc &MCID = TII->get(NewOpc); 1668 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI); 1669 MRI->constrainRegClass(EvenReg, TRC); 1670 MRI->constrainRegClass(OddReg, TRC); 1671 1672 // Form the pair instruction. 1673 if (isLd) { 1674 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1675 .addReg(EvenReg, RegState::Define) 1676 .addReg(OddReg, RegState::Define) 1677 .addReg(BaseReg); 1678 // FIXME: We're converting from LDRi12 to an insn that still 1679 // uses addrmode2, so we need an explicit offset reg. It should 1680 // always by reg0 since we're transforming LDRi12s. 1681 if (!isT2) 1682 MIB.addReg(0); 1683 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1684 ++NumLDRDFormed; 1685 } else { 1686 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1687 .addReg(EvenReg) 1688 .addReg(OddReg) 1689 .addReg(BaseReg); 1690 // FIXME: We're converting from LDRi12 to an insn that still 1691 // uses addrmode2, so we need an explicit offset reg. It should 1692 // always by reg0 since we're transforming STRi12s. 1693 if (!isT2) 1694 MIB.addReg(0); 1695 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1696 ++NumSTRDFormed; 1697 } 1698 MBB->erase(Op0); 1699 MBB->erase(Op1); 1700 1701 // Add register allocation hints to form register pairs. 1702 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1703 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1704 } else { 1705 for (unsigned i = 0; i != NumMove; ++i) { 1706 MachineInstr *Op = Ops.back(); 1707 Ops.pop_back(); 1708 MBB->splice(InsertPos, MBB, Op); 1709 } 1710 } 1711 1712 NumLdStMoved += NumMove; 1713 RetVal = true; 1714 } 1715 } 1716 } 1717 1718 return RetVal; 1719 } 1720 1721 bool 1722 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1723 bool RetVal = false; 1724 1725 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1726 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1727 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1728 SmallVector<unsigned, 4> LdBases; 1729 SmallVector<unsigned, 4> StBases; 1730 1731 unsigned Loc = 0; 1732 MachineBasicBlock::iterator MBBI = MBB->begin(); 1733 MachineBasicBlock::iterator E = MBB->end(); 1734 while (MBBI != E) { 1735 for (; MBBI != E; ++MBBI) { 1736 MachineInstr *MI = MBBI; 1737 const MCInstrDesc &MCID = MI->getDesc(); 1738 if (MCID.isCall() || MCID.isTerminator()) { 1739 // Stop at barriers. 1740 ++MBBI; 1741 break; 1742 } 1743 1744 if (!MI->isDebugValue()) 1745 MI2LocMap[MI] = ++Loc; 1746 1747 if (!isMemoryOp(MI)) 1748 continue; 1749 unsigned PredReg = 0; 1750 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) 1751 continue; 1752 1753 int Opc = MI->getOpcode(); 1754 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1755 unsigned Base = MI->getOperand(1).getReg(); 1756 int Offset = getMemoryOpOffset(MI); 1757 1758 bool StopHere = false; 1759 if (isLd) { 1760 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1761 Base2LdsMap.find(Base); 1762 if (BI != Base2LdsMap.end()) { 1763 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1764 if (Offset == getMemoryOpOffset(BI->second[i])) { 1765 StopHere = true; 1766 break; 1767 } 1768 } 1769 if (!StopHere) 1770 BI->second.push_back(MI); 1771 } else { 1772 SmallVector<MachineInstr*, 4> MIs; 1773 MIs.push_back(MI); 1774 Base2LdsMap[Base] = MIs; 1775 LdBases.push_back(Base); 1776 } 1777 } else { 1778 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1779 Base2StsMap.find(Base); 1780 if (BI != Base2StsMap.end()) { 1781 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1782 if (Offset == getMemoryOpOffset(BI->second[i])) { 1783 StopHere = true; 1784 break; 1785 } 1786 } 1787 if (!StopHere) 1788 BI->second.push_back(MI); 1789 } else { 1790 SmallVector<MachineInstr*, 4> MIs; 1791 MIs.push_back(MI); 1792 Base2StsMap[Base] = MIs; 1793 StBases.push_back(Base); 1794 } 1795 } 1796 1797 if (StopHere) { 1798 // Found a duplicate (a base+offset combination that's seen earlier). 1799 // Backtrack. 1800 --Loc; 1801 break; 1802 } 1803 } 1804 1805 // Re-schedule loads. 1806 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1807 unsigned Base = LdBases[i]; 1808 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1809 if (Lds.size() > 1) 1810 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1811 } 1812 1813 // Re-schedule stores. 1814 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1815 unsigned Base = StBases[i]; 1816 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1817 if (Sts.size() > 1) 1818 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1819 } 1820 1821 if (MBBI != E) { 1822 Base2LdsMap.clear(); 1823 Base2StsMap.clear(); 1824 LdBases.clear(); 1825 StBases.clear(); 1826 } 1827 } 1828 1829 return RetVal; 1830 } 1831 1832 1833 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1834 /// optimization pass. 1835 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1836 if (PreAlloc) 1837 return new ARMPreAllocLoadStoreOpt(); 1838 return new ARMLoadStoreOpt(); 1839 } 1840