1 //===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the Base ARM implementation of the TargetInstrInfo class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "ARMBaseInstrInfo.h" 15 #include "ARM.h" 16 #include "ARMAddressingModes.h" 17 #include "ARMConstantPoolValue.h" 18 #include "ARMHazardRecognizer.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMRegisterInfo.h" 21 #include "llvm/Constants.h" 22 #include "llvm/Function.h" 23 #include "llvm/GlobalValue.h" 24 #include "llvm/CodeGen/LiveVariables.h" 25 #include "llvm/CodeGen/MachineConstantPool.h" 26 #include "llvm/CodeGen/MachineFrameInfo.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineJumpTableInfo.h" 29 #include "llvm/CodeGen/MachineMemOperand.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 #include "llvm/CodeGen/PseudoSourceValue.h" 32 #include "llvm/MC/MCAsmInfo.h" 33 #include "llvm/Support/BranchProbability.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/ADT/STLExtras.h" 38 39 #define GET_INSTRINFO_CTOR 40 #include "ARMGenInstrInfo.inc" 41 42 using namespace llvm; 43 44 static cl::opt<bool> 45 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, 46 cl::desc("Enable ARM 2-addr to 3-addr conv")); 47 48 /// ARM_MLxEntry - Record information about MLA / MLS instructions. 49 struct ARM_MLxEntry { 50 unsigned MLxOpc; // MLA / MLS opcode 51 unsigned MulOpc; // Expanded multiplication opcode 52 unsigned AddSubOpc; // Expanded add / sub opcode 53 bool NegAcc; // True if the acc is negated before the add / sub. 54 bool HasLane; // True if instruction has an extra "lane" operand. 55 }; 56 57 static const ARM_MLxEntry ARM_MLxTable[] = { 58 // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane 59 // fp scalar ops 60 { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false }, 61 { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false }, 62 { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false }, 63 { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false }, 64 { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false }, 65 { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false }, 66 { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false }, 67 { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false }, 68 69 // fp SIMD ops 70 { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false }, 71 { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false }, 72 { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false }, 73 { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false }, 74 { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true }, 75 { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true }, 76 { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true }, 77 { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true }, 78 }; 79 80 ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) 81 : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), 82 Subtarget(STI) { 83 for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) { 84 if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second) 85 assert(false && "Duplicated entries?"); 86 MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc); 87 MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc); 88 } 89 } 90 91 // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl 92 // currently defaults to no prepass hazard recognizer. 93 ScheduleHazardRecognizer *ARMBaseInstrInfo:: 94 CreateTargetHazardRecognizer(const TargetMachine *TM, 95 const ScheduleDAG *DAG) const { 96 if (usePreRAHazardRecognizer()) { 97 const InstrItineraryData *II = TM->getInstrItineraryData(); 98 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); 99 } 100 return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG); 101 } 102 103 ScheduleHazardRecognizer *ARMBaseInstrInfo:: 104 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 105 const ScheduleDAG *DAG) const { 106 if (Subtarget.isThumb2() || Subtarget.hasVFP2()) 107 return (ScheduleHazardRecognizer *) 108 new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG); 109 return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG); 110 } 111 112 MachineInstr * 113 ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, 114 MachineBasicBlock::iterator &MBBI, 115 LiveVariables *LV) const { 116 // FIXME: Thumb2 support. 117 118 if (!EnableARM3Addr) 119 return NULL; 120 121 MachineInstr *MI = MBBI; 122 MachineFunction &MF = *MI->getParent()->getParent(); 123 uint64_t TSFlags = MI->getDesc().TSFlags; 124 bool isPre = false; 125 switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { 126 default: return NULL; 127 case ARMII::IndexModePre: 128 isPre = true; 129 break; 130 case ARMII::IndexModePost: 131 break; 132 } 133 134 // Try splitting an indexed load/store to an un-indexed one plus an add/sub 135 // operation. 136 unsigned MemOpc = getUnindexedOpcode(MI->getOpcode()); 137 if (MemOpc == 0) 138 return NULL; 139 140 MachineInstr *UpdateMI = NULL; 141 MachineInstr *MemMI = NULL; 142 unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); 143 const MCInstrDesc &MCID = MI->getDesc(); 144 unsigned NumOps = MCID.getNumOperands(); 145 bool isLoad = !MCID.mayStore(); 146 const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); 147 const MachineOperand &Base = MI->getOperand(2); 148 const MachineOperand &Offset = MI->getOperand(NumOps-3); 149 unsigned WBReg = WB.getReg(); 150 unsigned BaseReg = Base.getReg(); 151 unsigned OffReg = Offset.getReg(); 152 unsigned OffImm = MI->getOperand(NumOps-2).getImm(); 153 ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm(); 154 switch (AddrMode) { 155 default: 156 assert(false && "Unknown indexed op!"); 157 return NULL; 158 case ARMII::AddrMode2: { 159 bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; 160 unsigned Amt = ARM_AM::getAM2Offset(OffImm); 161 if (OffReg == 0) { 162 if (ARM_AM::getSOImmVal(Amt) == -1) 163 // Can't encode it in a so_imm operand. This transformation will 164 // add more than 1 instruction. Abandon! 165 return NULL; 166 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 167 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 168 .addReg(BaseReg).addImm(Amt) 169 .addImm(Pred).addReg(0).addReg(0); 170 } else if (Amt != 0) { 171 ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); 172 unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); 173 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 174 get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg) 175 .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc) 176 .addImm(Pred).addReg(0).addReg(0); 177 } else 178 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 179 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 180 .addReg(BaseReg).addReg(OffReg) 181 .addImm(Pred).addReg(0).addReg(0); 182 break; 183 } 184 case ARMII::AddrMode3 : { 185 bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub; 186 unsigned Amt = ARM_AM::getAM3Offset(OffImm); 187 if (OffReg == 0) 188 // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. 189 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 190 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 191 .addReg(BaseReg).addImm(Amt) 192 .addImm(Pred).addReg(0).addReg(0); 193 else 194 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 195 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 196 .addReg(BaseReg).addReg(OffReg) 197 .addImm(Pred).addReg(0).addReg(0); 198 break; 199 } 200 } 201 202 std::vector<MachineInstr*> NewMIs; 203 if (isPre) { 204 if (isLoad) 205 MemMI = BuildMI(MF, MI->getDebugLoc(), 206 get(MemOpc), MI->getOperand(0).getReg()) 207 .addReg(WBReg).addImm(0).addImm(Pred); 208 else 209 MemMI = BuildMI(MF, MI->getDebugLoc(), 210 get(MemOpc)).addReg(MI->getOperand(1).getReg()) 211 .addReg(WBReg).addReg(0).addImm(0).addImm(Pred); 212 NewMIs.push_back(MemMI); 213 NewMIs.push_back(UpdateMI); 214 } else { 215 if (isLoad) 216 MemMI = BuildMI(MF, MI->getDebugLoc(), 217 get(MemOpc), MI->getOperand(0).getReg()) 218 .addReg(BaseReg).addImm(0).addImm(Pred); 219 else 220 MemMI = BuildMI(MF, MI->getDebugLoc(), 221 get(MemOpc)).addReg(MI->getOperand(1).getReg()) 222 .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred); 223 if (WB.isDead()) 224 UpdateMI->getOperand(0).setIsDead(); 225 NewMIs.push_back(UpdateMI); 226 NewMIs.push_back(MemMI); 227 } 228 229 // Transfer LiveVariables states, kill / dead info. 230 if (LV) { 231 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 232 MachineOperand &MO = MI->getOperand(i); 233 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 234 unsigned Reg = MO.getReg(); 235 236 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); 237 if (MO.isDef()) { 238 MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; 239 if (MO.isDead()) 240 LV->addVirtualRegisterDead(Reg, NewMI); 241 } 242 if (MO.isUse() && MO.isKill()) { 243 for (unsigned j = 0; j < 2; ++j) { 244 // Look at the two new MI's in reverse order. 245 MachineInstr *NewMI = NewMIs[j]; 246 if (!NewMI->readsRegister(Reg)) 247 continue; 248 LV->addVirtualRegisterKilled(Reg, NewMI); 249 if (VI.removeKill(MI)) 250 VI.Kills.push_back(NewMI); 251 break; 252 } 253 } 254 } 255 } 256 } 257 258 MFI->insert(MBBI, NewMIs[1]); 259 MFI->insert(MBBI, NewMIs[0]); 260 return NewMIs[0]; 261 } 262 263 // Branch analysis. 264 bool 265 ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, 266 MachineBasicBlock *&FBB, 267 SmallVectorImpl<MachineOperand> &Cond, 268 bool AllowModify) const { 269 // If the block has no terminators, it just falls into the block after it. 270 MachineBasicBlock::iterator I = MBB.end(); 271 if (I == MBB.begin()) 272 return false; 273 --I; 274 while (I->isDebugValue()) { 275 if (I == MBB.begin()) 276 return false; 277 --I; 278 } 279 if (!isUnpredicatedTerminator(I)) 280 return false; 281 282 // Get the last instruction in the block. 283 MachineInstr *LastInst = I; 284 285 // If there is only one terminator instruction, process it. 286 unsigned LastOpc = LastInst->getOpcode(); 287 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 288 if (isUncondBranchOpcode(LastOpc)) { 289 TBB = LastInst->getOperand(0).getMBB(); 290 return false; 291 } 292 if (isCondBranchOpcode(LastOpc)) { 293 // Block ends with fall-through condbranch. 294 TBB = LastInst->getOperand(0).getMBB(); 295 Cond.push_back(LastInst->getOperand(1)); 296 Cond.push_back(LastInst->getOperand(2)); 297 return false; 298 } 299 return true; // Can't handle indirect branch. 300 } 301 302 // Get the instruction before it if it is a terminator. 303 MachineInstr *SecondLastInst = I; 304 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 305 306 // If AllowModify is true and the block ends with two or more unconditional 307 // branches, delete all but the first unconditional branch. 308 if (AllowModify && isUncondBranchOpcode(LastOpc)) { 309 while (isUncondBranchOpcode(SecondLastOpc)) { 310 LastInst->eraseFromParent(); 311 LastInst = SecondLastInst; 312 LastOpc = LastInst->getOpcode(); 313 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 314 // Return now the only terminator is an unconditional branch. 315 TBB = LastInst->getOperand(0).getMBB(); 316 return false; 317 } else { 318 SecondLastInst = I; 319 SecondLastOpc = SecondLastInst->getOpcode(); 320 } 321 } 322 } 323 324 // If there are three terminators, we don't know what sort of block this is. 325 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) 326 return true; 327 328 // If the block ends with a B and a Bcc, handle it. 329 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 330 TBB = SecondLastInst->getOperand(0).getMBB(); 331 Cond.push_back(SecondLastInst->getOperand(1)); 332 Cond.push_back(SecondLastInst->getOperand(2)); 333 FBB = LastInst->getOperand(0).getMBB(); 334 return false; 335 } 336 337 // If the block ends with two unconditional branches, handle it. The second 338 // one is not executed, so remove it. 339 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 340 TBB = SecondLastInst->getOperand(0).getMBB(); 341 I = LastInst; 342 if (AllowModify) 343 I->eraseFromParent(); 344 return false; 345 } 346 347 // ...likewise if it ends with a branch table followed by an unconditional 348 // branch. The branch folder can create these, and we must get rid of them for 349 // correctness of Thumb constant islands. 350 if ((isJumpTableBranchOpcode(SecondLastOpc) || 351 isIndirectBranchOpcode(SecondLastOpc)) && 352 isUncondBranchOpcode(LastOpc)) { 353 I = LastInst; 354 if (AllowModify) 355 I->eraseFromParent(); 356 return true; 357 } 358 359 // Otherwise, can't handle this. 360 return true; 361 } 362 363 364 unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 365 MachineBasicBlock::iterator I = MBB.end(); 366 if (I == MBB.begin()) return 0; 367 --I; 368 while (I->isDebugValue()) { 369 if (I == MBB.begin()) 370 return 0; 371 --I; 372 } 373 if (!isUncondBranchOpcode(I->getOpcode()) && 374 !isCondBranchOpcode(I->getOpcode())) 375 return 0; 376 377 // Remove the branch. 378 I->eraseFromParent(); 379 380 I = MBB.end(); 381 382 if (I == MBB.begin()) return 1; 383 --I; 384 if (!isCondBranchOpcode(I->getOpcode())) 385 return 1; 386 387 // Remove the branch. 388 I->eraseFromParent(); 389 return 2; 390 } 391 392 unsigned 393 ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 394 MachineBasicBlock *FBB, 395 const SmallVectorImpl<MachineOperand> &Cond, 396 DebugLoc DL) const { 397 ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); 398 int BOpc = !AFI->isThumbFunction() 399 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); 400 int BccOpc = !AFI->isThumbFunction() 401 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc); 402 403 // Shouldn't be a fall through. 404 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 405 assert((Cond.size() == 2 || Cond.size() == 0) && 406 "ARM branch conditions have two components!"); 407 408 if (FBB == 0) { 409 if (Cond.empty()) // Unconditional branch? 410 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); 411 else 412 BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) 413 .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); 414 return 1; 415 } 416 417 // Two-way conditional branch. 418 BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) 419 .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); 420 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); 421 return 2; 422 } 423 424 bool ARMBaseInstrInfo:: 425 ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 426 ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); 427 Cond[0].setImm(ARMCC::getOppositeCondition(CC)); 428 return false; 429 } 430 431 bool ARMBaseInstrInfo:: 432 PredicateInstruction(MachineInstr *MI, 433 const SmallVectorImpl<MachineOperand> &Pred) const { 434 unsigned Opc = MI->getOpcode(); 435 if (isUncondBranchOpcode(Opc)) { 436 MI->setDesc(get(getMatchingCondBranchOpcode(Opc))); 437 MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm())); 438 MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false)); 439 return true; 440 } 441 442 int PIdx = MI->findFirstPredOperandIdx(); 443 if (PIdx != -1) { 444 MachineOperand &PMO = MI->getOperand(PIdx); 445 PMO.setImm(Pred[0].getImm()); 446 MI->getOperand(PIdx+1).setReg(Pred[1].getReg()); 447 return true; 448 } 449 return false; 450 } 451 452 bool ARMBaseInstrInfo:: 453 SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 454 const SmallVectorImpl<MachineOperand> &Pred2) const { 455 if (Pred1.size() > 2 || Pred2.size() > 2) 456 return false; 457 458 ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); 459 ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); 460 if (CC1 == CC2) 461 return true; 462 463 switch (CC1) { 464 default: 465 return false; 466 case ARMCC::AL: 467 return true; 468 case ARMCC::HS: 469 return CC2 == ARMCC::HI; 470 case ARMCC::LS: 471 return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; 472 case ARMCC::GE: 473 return CC2 == ARMCC::GT; 474 case ARMCC::LE: 475 return CC2 == ARMCC::LT; 476 } 477 } 478 479 bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, 480 std::vector<MachineOperand> &Pred) const { 481 // FIXME: This confuses implicit_def with optional CPSR def. 482 const MCInstrDesc &MCID = MI->getDesc(); 483 if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef()) 484 return false; 485 486 bool Found = false; 487 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 488 const MachineOperand &MO = MI->getOperand(i); 489 if (MO.isReg() && MO.getReg() == ARM::CPSR) { 490 Pred.push_back(MO); 491 Found = true; 492 } 493 } 494 495 return Found; 496 } 497 498 /// isPredicable - Return true if the specified instruction can be predicated. 499 /// By default, this returns true for every instruction with a 500 /// PredicateOperand. 501 bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { 502 const MCInstrDesc &MCID = MI->getDesc(); 503 if (!MCID.isPredicable()) 504 return false; 505 506 if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { 507 ARMFunctionInfo *AFI = 508 MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); 509 return AFI->isThumb2Function(); 510 } 511 return true; 512 } 513 514 /// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. 515 LLVM_ATTRIBUTE_NOINLINE 516 static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, 517 unsigned JTI); 518 static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, 519 unsigned JTI) { 520 assert(JTI < JT.size()); 521 return JT[JTI].MBBs.size(); 522 } 523 524 /// GetInstSize - Return the size of the specified MachineInstr. 525 /// 526 unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { 527 const MachineBasicBlock &MBB = *MI->getParent(); 528 const MachineFunction *MF = MBB.getParent(); 529 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 530 531 const MCInstrDesc &MCID = MI->getDesc(); 532 if (MCID.getSize()) 533 return MCID.getSize(); 534 535 // If this machine instr is an inline asm, measure it. 536 if (MI->getOpcode() == ARM::INLINEASM) 537 return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); 538 if (MI->isLabel()) 539 return 0; 540 unsigned Opc = MI->getOpcode(); 541 switch (Opc) { 542 case TargetOpcode::IMPLICIT_DEF: 543 case TargetOpcode::KILL: 544 case TargetOpcode::PROLOG_LABEL: 545 case TargetOpcode::EH_LABEL: 546 case TargetOpcode::DBG_VALUE: 547 return 0; 548 case ARM::MOVi16_ga_pcrel: 549 case ARM::MOVTi16_ga_pcrel: 550 case ARM::t2MOVi16_ga_pcrel: 551 case ARM::t2MOVTi16_ga_pcrel: 552 return 4; 553 case ARM::MOVi32imm: 554 case ARM::t2MOVi32imm: 555 return 8; 556 case ARM::CONSTPOOL_ENTRY: 557 // If this machine instr is a constant pool entry, its size is recorded as 558 // operand #2. 559 return MI->getOperand(2).getImm(); 560 case ARM::Int_eh_sjlj_longjmp: 561 return 16; 562 case ARM::tInt_eh_sjlj_longjmp: 563 return 10; 564 case ARM::Int_eh_sjlj_setjmp: 565 case ARM::Int_eh_sjlj_setjmp_nofp: 566 return 20; 567 case ARM::tInt_eh_sjlj_setjmp: 568 case ARM::t2Int_eh_sjlj_setjmp: 569 case ARM::t2Int_eh_sjlj_setjmp_nofp: 570 return 12; 571 case ARM::BR_JTr: 572 case ARM::BR_JTm: 573 case ARM::BR_JTadd: 574 case ARM::tBR_JTr: 575 case ARM::t2BR_JT: 576 case ARM::t2TBB_JT: 577 case ARM::t2TBH_JT: { 578 // These are jumptable branches, i.e. a branch followed by an inlined 579 // jumptable. The size is 4 + 4 * number of entries. For TBB, each 580 // entry is one byte; TBH two byte each. 581 unsigned EntrySize = (Opc == ARM::t2TBB_JT) 582 ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); 583 unsigned NumOps = MCID.getNumOperands(); 584 MachineOperand JTOP = 585 MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2)); 586 unsigned JTI = JTOP.getIndex(); 587 const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); 588 assert(MJTI != 0); 589 const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); 590 assert(JTI < JT.size()); 591 // Thumb instructions are 2 byte aligned, but JT entries are 4 byte 592 // 4 aligned. The assembler / linker may add 2 byte padding just before 593 // the JT entries. The size does not include this padding; the 594 // constant islands pass does separate bookkeeping for it. 595 // FIXME: If we know the size of the function is less than (1 << 16) *2 596 // bytes, we can use 16-bit entries instead. Then there won't be an 597 // alignment issue. 598 unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; 599 unsigned NumEntries = getNumJTEntries(JT, JTI); 600 if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) 601 // Make sure the instruction that follows TBB is 2-byte aligned. 602 // FIXME: Constant island pass should insert an "ALIGN" instruction 603 // instead. 604 ++NumEntries; 605 return NumEntries * EntrySize + InstSize; 606 } 607 default: 608 // Otherwise, pseudo-instruction sizes are zero. 609 return 0; 610 } 611 return 0; // Not reached 612 } 613 614 void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 615 MachineBasicBlock::iterator I, DebugLoc DL, 616 unsigned DestReg, unsigned SrcReg, 617 bool KillSrc) const { 618 bool GPRDest = ARM::GPRRegClass.contains(DestReg); 619 bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); 620 621 if (GPRDest && GPRSrc) { 622 AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) 623 .addReg(SrcReg, getKillRegState(KillSrc)))); 624 return; 625 } 626 627 bool SPRDest = ARM::SPRRegClass.contains(DestReg); 628 bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); 629 630 unsigned Opc; 631 if (SPRDest && SPRSrc) 632 Opc = ARM::VMOVS; 633 else if (GPRDest && SPRSrc) 634 Opc = ARM::VMOVRS; 635 else if (SPRDest && GPRSrc) 636 Opc = ARM::VMOVSR; 637 else if (ARM::DPRRegClass.contains(DestReg, SrcReg)) 638 Opc = ARM::VMOVD; 639 else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) 640 Opc = ARM::VORRq; 641 else if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) 642 Opc = ARM::VMOVQQ; 643 else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) 644 Opc = ARM::VMOVQQQQ; 645 else 646 llvm_unreachable("Impossible reg-to-reg copy"); 647 648 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); 649 MIB.addReg(SrcReg, getKillRegState(KillSrc)); 650 if (Opc == ARM::VORRq) 651 MIB.addReg(SrcReg, getKillRegState(KillSrc)); 652 if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ) 653 AddDefaultPred(MIB); 654 } 655 656 static const 657 MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, 658 unsigned Reg, unsigned SubIdx, unsigned State, 659 const TargetRegisterInfo *TRI) { 660 if (!SubIdx) 661 return MIB.addReg(Reg, State); 662 663 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 664 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 665 return MIB.addReg(Reg, State, SubIdx); 666 } 667 668 void ARMBaseInstrInfo:: 669 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 670 unsigned SrcReg, bool isKill, int FI, 671 const TargetRegisterClass *RC, 672 const TargetRegisterInfo *TRI) const { 673 DebugLoc DL; 674 if (I != MBB.end()) DL = I->getDebugLoc(); 675 MachineFunction &MF = *MBB.getParent(); 676 MachineFrameInfo &MFI = *MF.getFrameInfo(); 677 unsigned Align = MFI.getObjectAlignment(FI); 678 679 MachineMemOperand *MMO = 680 MF.getMachineMemOperand(MachinePointerInfo( 681 PseudoSourceValue::getFixedStack(FI)), 682 MachineMemOperand::MOStore, 683 MFI.getObjectSize(FI), 684 Align); 685 686 // tGPR is used sometimes in ARM instructions that need to avoid using 687 // certain registers. Just treat it as GPR here. Likewise, rGPR. 688 if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass 689 || RC == ARM::rGPRRegisterClass) 690 RC = ARM::GPRRegisterClass; 691 692 switch (RC->getID()) { 693 case ARM::GPRRegClassID: 694 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12)) 695 .addReg(SrcReg, getKillRegState(isKill)) 696 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 697 break; 698 case ARM::SPRRegClassID: 699 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) 700 .addReg(SrcReg, getKillRegState(isKill)) 701 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 702 break; 703 case ARM::DPRRegClassID: 704 case ARM::DPR_VFP2RegClassID: 705 case ARM::DPR_8RegClassID: 706 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) 707 .addReg(SrcReg, getKillRegState(isKill)) 708 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 709 break; 710 case ARM::QPRRegClassID: 711 case ARM::QPR_VFP2RegClassID: 712 case ARM::QPR_8RegClassID: 713 if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { 714 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo)) 715 .addFrameIndex(FI).addImm(16) 716 .addReg(SrcReg, getKillRegState(isKill)) 717 .addMemOperand(MMO)); 718 } else { 719 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA)) 720 .addReg(SrcReg, getKillRegState(isKill)) 721 .addFrameIndex(FI) 722 .addMemOperand(MMO)); 723 } 724 break; 725 case ARM::QQPRRegClassID: 726 case ARM::QQPR_VFP2RegClassID: 727 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 728 // FIXME: It's possible to only store part of the QQ register if the 729 // spilled def has a sub-register index. 730 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo)) 731 .addFrameIndex(FI).addImm(16) 732 .addReg(SrcReg, getKillRegState(isKill)) 733 .addMemOperand(MMO)); 734 } else { 735 MachineInstrBuilder MIB = 736 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) 737 .addFrameIndex(FI)) 738 .addMemOperand(MMO); 739 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 740 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 741 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 742 AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 743 } 744 break; 745 case ARM::QQQQPRRegClassID: { 746 MachineInstrBuilder MIB = 747 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) 748 .addFrameIndex(FI)) 749 .addMemOperand(MMO); 750 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 751 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 752 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 753 MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 754 MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); 755 MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); 756 MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); 757 AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); 758 break; 759 } 760 default: 761 llvm_unreachable("Unknown regclass!"); 762 } 763 } 764 765 unsigned 766 ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, 767 int &FrameIndex) const { 768 switch (MI->getOpcode()) { 769 default: break; 770 case ARM::STRrs: 771 case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. 772 if (MI->getOperand(1).isFI() && 773 MI->getOperand(2).isReg() && 774 MI->getOperand(3).isImm() && 775 MI->getOperand(2).getReg() == 0 && 776 MI->getOperand(3).getImm() == 0) { 777 FrameIndex = MI->getOperand(1).getIndex(); 778 return MI->getOperand(0).getReg(); 779 } 780 break; 781 case ARM::STRi12: 782 case ARM::t2STRi12: 783 case ARM::tSTRspi: 784 case ARM::VSTRD: 785 case ARM::VSTRS: 786 if (MI->getOperand(1).isFI() && 787 MI->getOperand(2).isImm() && 788 MI->getOperand(2).getImm() == 0) { 789 FrameIndex = MI->getOperand(1).getIndex(); 790 return MI->getOperand(0).getReg(); 791 } 792 break; 793 case ARM::VST1q64Pseudo: 794 if (MI->getOperand(0).isFI() && 795 MI->getOperand(2).getSubReg() == 0) { 796 FrameIndex = MI->getOperand(0).getIndex(); 797 return MI->getOperand(2).getReg(); 798 } 799 break; 800 case ARM::VSTMQIA: 801 if (MI->getOperand(1).isFI() && 802 MI->getOperand(0).getSubReg() == 0) { 803 FrameIndex = MI->getOperand(1).getIndex(); 804 return MI->getOperand(0).getReg(); 805 } 806 break; 807 } 808 809 return 0; 810 } 811 812 void ARMBaseInstrInfo:: 813 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 814 unsigned DestReg, int FI, 815 const TargetRegisterClass *RC, 816 const TargetRegisterInfo *TRI) const { 817 DebugLoc DL; 818 if (I != MBB.end()) DL = I->getDebugLoc(); 819 MachineFunction &MF = *MBB.getParent(); 820 MachineFrameInfo &MFI = *MF.getFrameInfo(); 821 unsigned Align = MFI.getObjectAlignment(FI); 822 MachineMemOperand *MMO = 823 MF.getMachineMemOperand( 824 MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), 825 MachineMemOperand::MOLoad, 826 MFI.getObjectSize(FI), 827 Align); 828 829 // tGPR is used sometimes in ARM instructions that need to avoid using 830 // certain registers. Just treat it as GPR here. 831 if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass 832 || RC == ARM::rGPRRegisterClass) 833 RC = ARM::GPRRegisterClass; 834 835 switch (RC->getID()) { 836 case ARM::GPRRegClassID: 837 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) 838 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 839 break; 840 case ARM::SPRRegClassID: 841 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) 842 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 843 break; 844 case ARM::DPRRegClassID: 845 case ARM::DPR_VFP2RegClassID: 846 case ARM::DPR_8RegClassID: 847 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) 848 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 849 break; 850 case ARM::QPRRegClassID: 851 case ARM::QPR_VFP2RegClassID: 852 case ARM::QPR_8RegClassID: 853 if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { 854 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg) 855 .addFrameIndex(FI).addImm(16) 856 .addMemOperand(MMO)); 857 } else { 858 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) 859 .addFrameIndex(FI) 860 .addMemOperand(MMO)); 861 } 862 break; 863 case ARM::QQPRRegClassID: 864 case ARM::QQPR_VFP2RegClassID: 865 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 866 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) 867 .addFrameIndex(FI).addImm(16) 868 .addMemOperand(MMO)); 869 } else { 870 MachineInstrBuilder MIB = 871 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 872 .addFrameIndex(FI)) 873 .addMemOperand(MMO); 874 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); 875 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); 876 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); 877 AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); 878 } 879 break; 880 case ARM::QQQQPRRegClassID: { 881 MachineInstrBuilder MIB = 882 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 883 .addFrameIndex(FI)) 884 .addMemOperand(MMO); 885 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); 886 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); 887 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); 888 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); 889 MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI); 890 MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI); 891 MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI); 892 AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI); 893 break; 894 } 895 default: 896 llvm_unreachable("Unknown regclass!"); 897 } 898 } 899 900 unsigned 901 ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 902 int &FrameIndex) const { 903 switch (MI->getOpcode()) { 904 default: break; 905 case ARM::LDRrs: 906 case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. 907 if (MI->getOperand(1).isFI() && 908 MI->getOperand(2).isReg() && 909 MI->getOperand(3).isImm() && 910 MI->getOperand(2).getReg() == 0 && 911 MI->getOperand(3).getImm() == 0) { 912 FrameIndex = MI->getOperand(1).getIndex(); 913 return MI->getOperand(0).getReg(); 914 } 915 break; 916 case ARM::LDRi12: 917 case ARM::t2LDRi12: 918 case ARM::tLDRspi: 919 case ARM::VLDRD: 920 case ARM::VLDRS: 921 if (MI->getOperand(1).isFI() && 922 MI->getOperand(2).isImm() && 923 MI->getOperand(2).getImm() == 0) { 924 FrameIndex = MI->getOperand(1).getIndex(); 925 return MI->getOperand(0).getReg(); 926 } 927 break; 928 case ARM::VLD1q64Pseudo: 929 if (MI->getOperand(1).isFI() && 930 MI->getOperand(0).getSubReg() == 0) { 931 FrameIndex = MI->getOperand(1).getIndex(); 932 return MI->getOperand(0).getReg(); 933 } 934 break; 935 case ARM::VLDMQIA: 936 if (MI->getOperand(1).isFI() && 937 MI->getOperand(0).getSubReg() == 0) { 938 FrameIndex = MI->getOperand(1).getIndex(); 939 return MI->getOperand(0).getReg(); 940 } 941 break; 942 } 943 944 return 0; 945 } 946 947 MachineInstr* 948 ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, 949 int FrameIx, uint64_t Offset, 950 const MDNode *MDPtr, 951 DebugLoc DL) const { 952 MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE)) 953 .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); 954 return &*MIB; 955 } 956 957 /// Create a copy of a const pool value. Update CPI to the new index and return 958 /// the label UID. 959 static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { 960 MachineConstantPool *MCP = MF.getConstantPool(); 961 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 962 963 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; 964 assert(MCPE.isMachineConstantPoolEntry() && 965 "Expecting a machine constantpool entry!"); 966 ARMConstantPoolValue *ACPV = 967 static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); 968 969 unsigned PCLabelId = AFI->createPICLabelUId(); 970 ARMConstantPoolValue *NewCPV = 0; 971 // FIXME: The below assumes PIC relocation model and that the function 972 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and 973 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR 974 // instructions, so that's probably OK, but is PIC always correct when 975 // we get here? 976 if (ACPV->isGlobalValue()) 977 NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId, 978 ARMCP::CPValue, 4); 979 else if (ACPV->isExtSymbol()) 980 NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(), 981 ACPV->getSymbol(), PCLabelId, 4); 982 else if (ACPV->isBlockAddress()) 983 NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId, 984 ARMCP::CPBlockAddress, 4); 985 else if (ACPV->isLSDA()) 986 NewCPV = new ARMConstantPoolValue(MF.getFunction(), PCLabelId, 987 ARMCP::CPLSDA, 4); 988 else 989 llvm_unreachable("Unexpected ARM constantpool value type!!"); 990 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); 991 return PCLabelId; 992 } 993 994 void ARMBaseInstrInfo:: 995 reMaterialize(MachineBasicBlock &MBB, 996 MachineBasicBlock::iterator I, 997 unsigned DestReg, unsigned SubIdx, 998 const MachineInstr *Orig, 999 const TargetRegisterInfo &TRI) const { 1000 unsigned Opcode = Orig->getOpcode(); 1001 switch (Opcode) { 1002 default: { 1003 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); 1004 MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); 1005 MBB.insert(I, MI); 1006 break; 1007 } 1008 case ARM::tLDRpci_pic: 1009 case ARM::t2LDRpci_pic: { 1010 MachineFunction &MF = *MBB.getParent(); 1011 unsigned CPI = Orig->getOperand(1).getIndex(); 1012 unsigned PCLabelId = duplicateCPV(MF, CPI); 1013 MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode), 1014 DestReg) 1015 .addConstantPoolIndex(CPI).addImm(PCLabelId); 1016 MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end()); 1017 break; 1018 } 1019 } 1020 } 1021 1022 MachineInstr * 1023 ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { 1024 MachineInstr *MI = TargetInstrInfoImpl::duplicate(Orig, MF); 1025 switch(Orig->getOpcode()) { 1026 case ARM::tLDRpci_pic: 1027 case ARM::t2LDRpci_pic: { 1028 unsigned CPI = Orig->getOperand(1).getIndex(); 1029 unsigned PCLabelId = duplicateCPV(MF, CPI); 1030 Orig->getOperand(1).setIndex(CPI); 1031 Orig->getOperand(2).setImm(PCLabelId); 1032 break; 1033 } 1034 } 1035 return MI; 1036 } 1037 1038 bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, 1039 const MachineInstr *MI1, 1040 const MachineRegisterInfo *MRI) const { 1041 int Opcode = MI0->getOpcode(); 1042 if (Opcode == ARM::t2LDRpci || 1043 Opcode == ARM::t2LDRpci_pic || 1044 Opcode == ARM::tLDRpci || 1045 Opcode == ARM::tLDRpci_pic || 1046 Opcode == ARM::MOV_ga_dyn || 1047 Opcode == ARM::MOV_ga_pcrel || 1048 Opcode == ARM::MOV_ga_pcrel_ldr || 1049 Opcode == ARM::t2MOV_ga_dyn || 1050 Opcode == ARM::t2MOV_ga_pcrel) { 1051 if (MI1->getOpcode() != Opcode) 1052 return false; 1053 if (MI0->getNumOperands() != MI1->getNumOperands()) 1054 return false; 1055 1056 const MachineOperand &MO0 = MI0->getOperand(1); 1057 const MachineOperand &MO1 = MI1->getOperand(1); 1058 if (MO0.getOffset() != MO1.getOffset()) 1059 return false; 1060 1061 if (Opcode == ARM::MOV_ga_dyn || 1062 Opcode == ARM::MOV_ga_pcrel || 1063 Opcode == ARM::MOV_ga_pcrel_ldr || 1064 Opcode == ARM::t2MOV_ga_dyn || 1065 Opcode == ARM::t2MOV_ga_pcrel) 1066 // Ignore the PC labels. 1067 return MO0.getGlobal() == MO1.getGlobal(); 1068 1069 const MachineFunction *MF = MI0->getParent()->getParent(); 1070 const MachineConstantPool *MCP = MF->getConstantPool(); 1071 int CPI0 = MO0.getIndex(); 1072 int CPI1 = MO1.getIndex(); 1073 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; 1074 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; 1075 bool isARMCP0 = MCPE0.isMachineConstantPoolEntry(); 1076 bool isARMCP1 = MCPE1.isMachineConstantPoolEntry(); 1077 if (isARMCP0 && isARMCP1) { 1078 ARMConstantPoolValue *ACPV0 = 1079 static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); 1080 ARMConstantPoolValue *ACPV1 = 1081 static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); 1082 return ACPV0->hasSameValue(ACPV1); 1083 } else if (!isARMCP0 && !isARMCP1) { 1084 return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal; 1085 } 1086 return false; 1087 } else if (Opcode == ARM::PICLDR) { 1088 if (MI1->getOpcode() != Opcode) 1089 return false; 1090 if (MI0->getNumOperands() != MI1->getNumOperands()) 1091 return false; 1092 1093 unsigned Addr0 = MI0->getOperand(1).getReg(); 1094 unsigned Addr1 = MI1->getOperand(1).getReg(); 1095 if (Addr0 != Addr1) { 1096 if (!MRI || 1097 !TargetRegisterInfo::isVirtualRegister(Addr0) || 1098 !TargetRegisterInfo::isVirtualRegister(Addr1)) 1099 return false; 1100 1101 // This assumes SSA form. 1102 MachineInstr *Def0 = MRI->getVRegDef(Addr0); 1103 MachineInstr *Def1 = MRI->getVRegDef(Addr1); 1104 // Check if the loaded value, e.g. a constantpool of a global address, are 1105 // the same. 1106 if (!produceSameValue(Def0, Def1, MRI)) 1107 return false; 1108 } 1109 1110 for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) { 1111 // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg 1112 const MachineOperand &MO0 = MI0->getOperand(i); 1113 const MachineOperand &MO1 = MI1->getOperand(i); 1114 if (!MO0.isIdenticalTo(MO1)) 1115 return false; 1116 } 1117 return true; 1118 } 1119 1120 return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); 1121 } 1122 1123 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to 1124 /// determine if two loads are loading from the same base address. It should 1125 /// only return true if the base pointers are the same and the only differences 1126 /// between the two addresses is the offset. It also returns the offsets by 1127 /// reference. 1128 bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 1129 int64_t &Offset1, 1130 int64_t &Offset2) const { 1131 // Don't worry about Thumb: just ARM and Thumb2. 1132 if (Subtarget.isThumb1Only()) return false; 1133 1134 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) 1135 return false; 1136 1137 switch (Load1->getMachineOpcode()) { 1138 default: 1139 return false; 1140 case ARM::LDRi12: 1141 case ARM::LDRBi12: 1142 case ARM::LDRD: 1143 case ARM::LDRH: 1144 case ARM::LDRSB: 1145 case ARM::LDRSH: 1146 case ARM::VLDRD: 1147 case ARM::VLDRS: 1148 case ARM::t2LDRi8: 1149 case ARM::t2LDRDi8: 1150 case ARM::t2LDRSHi8: 1151 case ARM::t2LDRi12: 1152 case ARM::t2LDRSHi12: 1153 break; 1154 } 1155 1156 switch (Load2->getMachineOpcode()) { 1157 default: 1158 return false; 1159 case ARM::LDRi12: 1160 case ARM::LDRBi12: 1161 case ARM::LDRD: 1162 case ARM::LDRH: 1163 case ARM::LDRSB: 1164 case ARM::LDRSH: 1165 case ARM::VLDRD: 1166 case ARM::VLDRS: 1167 case ARM::t2LDRi8: 1168 case ARM::t2LDRDi8: 1169 case ARM::t2LDRSHi8: 1170 case ARM::t2LDRi12: 1171 case ARM::t2LDRSHi12: 1172 break; 1173 } 1174 1175 // Check if base addresses and chain operands match. 1176 if (Load1->getOperand(0) != Load2->getOperand(0) || 1177 Load1->getOperand(4) != Load2->getOperand(4)) 1178 return false; 1179 1180 // Index should be Reg0. 1181 if (Load1->getOperand(3) != Load2->getOperand(3)) 1182 return false; 1183 1184 // Determine the offsets. 1185 if (isa<ConstantSDNode>(Load1->getOperand(1)) && 1186 isa<ConstantSDNode>(Load2->getOperand(1))) { 1187 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue(); 1188 Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue(); 1189 return true; 1190 } 1191 1192 return false; 1193 } 1194 1195 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to 1196 /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should 1197 /// be scheduled togther. On some targets if two loads are loading from 1198 /// addresses in the same cache line, it's better if they are scheduled 1199 /// together. This function takes two integers that represent the load offsets 1200 /// from the common base address. It returns true if it decides it's desirable 1201 /// to schedule the two loads together. "NumLoads" is the number of loads that 1202 /// have already been scheduled after Load1. 1203 bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, 1204 int64_t Offset1, int64_t Offset2, 1205 unsigned NumLoads) const { 1206 // Don't worry about Thumb: just ARM and Thumb2. 1207 if (Subtarget.isThumb1Only()) return false; 1208 1209 assert(Offset2 > Offset1); 1210 1211 if ((Offset2 - Offset1) / 8 > 64) 1212 return false; 1213 1214 if (Load1->getMachineOpcode() != Load2->getMachineOpcode()) 1215 return false; // FIXME: overly conservative? 1216 1217 // Four loads in a row should be sufficient. 1218 if (NumLoads >= 3) 1219 return false; 1220 1221 return true; 1222 } 1223 1224 bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, 1225 const MachineBasicBlock *MBB, 1226 const MachineFunction &MF) const { 1227 // Debug info is never a scheduling boundary. It's necessary to be explicit 1228 // due to the special treatment of IT instructions below, otherwise a 1229 // dbg_value followed by an IT will result in the IT instruction being 1230 // considered a scheduling hazard, which is wrong. It should be the actual 1231 // instruction preceding the dbg_value instruction(s), just like it is 1232 // when debug info is not present. 1233 if (MI->isDebugValue()) 1234 return false; 1235 1236 // Terminators and labels can't be scheduled around. 1237 if (MI->getDesc().isTerminator() || MI->isLabel()) 1238 return true; 1239 1240 // Treat the start of the IT block as a scheduling boundary, but schedule 1241 // t2IT along with all instructions following it. 1242 // FIXME: This is a big hammer. But the alternative is to add all potential 1243 // true and anti dependencies to IT block instructions as implicit operands 1244 // to the t2IT instruction. The added compile time and complexity does not 1245 // seem worth it. 1246 MachineBasicBlock::const_iterator I = MI; 1247 // Make sure to skip any dbg_value instructions 1248 while (++I != MBB->end() && I->isDebugValue()) 1249 ; 1250 if (I != MBB->end() && I->getOpcode() == ARM::t2IT) 1251 return true; 1252 1253 // Don't attempt to schedule around any instruction that defines 1254 // a stack-oriented pointer, as it's unlikely to be profitable. This 1255 // saves compile time, because it doesn't require every single 1256 // stack slot reference to depend on the instruction that does the 1257 // modification. 1258 if (MI->definesRegister(ARM::SP)) 1259 return true; 1260 1261 return false; 1262 } 1263 1264 bool ARMBaseInstrInfo:: 1265 isProfitableToIfCvt(MachineBasicBlock &MBB, 1266 unsigned NumCycles, unsigned ExtraPredCycles, 1267 const BranchProbability &Probability) const { 1268 if (!NumCycles) 1269 return false; 1270 1271 // Attempt to estimate the relative costs of predication versus branching. 1272 unsigned UnpredCost = Probability.getNumerator() * NumCycles; 1273 UnpredCost /= Probability.getDenominator(); 1274 UnpredCost += 1; // The branch itself 1275 UnpredCost += Subtarget.getMispredictionPenalty() / 10; 1276 1277 return (NumCycles + ExtraPredCycles) <= UnpredCost; 1278 } 1279 1280 bool ARMBaseInstrInfo:: 1281 isProfitableToIfCvt(MachineBasicBlock &TMBB, 1282 unsigned TCycles, unsigned TExtra, 1283 MachineBasicBlock &FMBB, 1284 unsigned FCycles, unsigned FExtra, 1285 const BranchProbability &Probability) const { 1286 if (!TCycles || !FCycles) 1287 return false; 1288 1289 // Attempt to estimate the relative costs of predication versus branching. 1290 unsigned TUnpredCost = Probability.getNumerator() * TCycles; 1291 TUnpredCost /= Probability.getDenominator(); 1292 1293 uint32_t Comp = Probability.getDenominator() - Probability.getNumerator(); 1294 unsigned FUnpredCost = Comp * FCycles; 1295 FUnpredCost /= Probability.getDenominator(); 1296 1297 unsigned UnpredCost = TUnpredCost + FUnpredCost; 1298 UnpredCost += 1; // The branch itself 1299 UnpredCost += Subtarget.getMispredictionPenalty() / 10; 1300 1301 return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost; 1302 } 1303 1304 /// getInstrPredicate - If instruction is predicated, returns its predicate 1305 /// condition, otherwise returns AL. It also returns the condition code 1306 /// register by reference. 1307 ARMCC::CondCodes 1308 llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { 1309 int PIdx = MI->findFirstPredOperandIdx(); 1310 if (PIdx == -1) { 1311 PredReg = 0; 1312 return ARMCC::AL; 1313 } 1314 1315 PredReg = MI->getOperand(PIdx+1).getReg(); 1316 return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm(); 1317 } 1318 1319 1320 int llvm::getMatchingCondBranchOpcode(int Opc) { 1321 if (Opc == ARM::B) 1322 return ARM::Bcc; 1323 else if (Opc == ARM::tB) 1324 return ARM::tBcc; 1325 else if (Opc == ARM::t2B) 1326 return ARM::t2Bcc; 1327 1328 llvm_unreachable("Unknown unconditional branch opcode!"); 1329 return 0; 1330 } 1331 1332 1333 void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, 1334 MachineBasicBlock::iterator &MBBI, DebugLoc dl, 1335 unsigned DestReg, unsigned BaseReg, int NumBytes, 1336 ARMCC::CondCodes Pred, unsigned PredReg, 1337 const ARMBaseInstrInfo &TII, unsigned MIFlags) { 1338 bool isSub = NumBytes < 0; 1339 if (isSub) NumBytes = -NumBytes; 1340 1341 while (NumBytes) { 1342 unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); 1343 unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); 1344 assert(ThisVal && "Didn't extract field correctly"); 1345 1346 // We will handle these bits from offset, clear them. 1347 NumBytes &= ~ThisVal; 1348 1349 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?"); 1350 1351 // Build the new ADD / SUB. 1352 unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; 1353 BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) 1354 .addReg(BaseReg, RegState::Kill).addImm(ThisVal) 1355 .addImm((unsigned)Pred).addReg(PredReg).addReg(0) 1356 .setMIFlags(MIFlags); 1357 BaseReg = DestReg; 1358 } 1359 } 1360 1361 bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 1362 unsigned FrameReg, int &Offset, 1363 const ARMBaseInstrInfo &TII) { 1364 unsigned Opcode = MI.getOpcode(); 1365 const MCInstrDesc &Desc = MI.getDesc(); 1366 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); 1367 bool isSub = false; 1368 1369 // Memory operands in inline assembly always use AddrMode2. 1370 if (Opcode == ARM::INLINEASM) 1371 AddrMode = ARMII::AddrMode2; 1372 1373 if (Opcode == ARM::ADDri) { 1374 Offset += MI.getOperand(FrameRegIdx+1).getImm(); 1375 if (Offset == 0) { 1376 // Turn it into a move. 1377 MI.setDesc(TII.get(ARM::MOVr)); 1378 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 1379 MI.RemoveOperand(FrameRegIdx+1); 1380 Offset = 0; 1381 return true; 1382 } else if (Offset < 0) { 1383 Offset = -Offset; 1384 isSub = true; 1385 MI.setDesc(TII.get(ARM::SUBri)); 1386 } 1387 1388 // Common case: small offset, fits into instruction. 1389 if (ARM_AM::getSOImmVal(Offset) != -1) { 1390 // Replace the FrameIndex with sp / fp 1391 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 1392 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); 1393 Offset = 0; 1394 return true; 1395 } 1396 1397 // Otherwise, pull as much of the immedidate into this ADDri/SUBri 1398 // as possible. 1399 unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); 1400 unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt); 1401 1402 // We will handle these bits from offset, clear them. 1403 Offset &= ~ThisImmVal; 1404 1405 // Get the properly encoded SOImmVal field. 1406 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 && 1407 "Bit extraction didn't work?"); 1408 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal); 1409 } else { 1410 unsigned ImmIdx = 0; 1411 int InstrOffs = 0; 1412 unsigned NumBits = 0; 1413 unsigned Scale = 1; 1414 switch (AddrMode) { 1415 case ARMII::AddrMode_i12: { 1416 ImmIdx = FrameRegIdx + 1; 1417 InstrOffs = MI.getOperand(ImmIdx).getImm(); 1418 NumBits = 12; 1419 break; 1420 } 1421 case ARMII::AddrMode2: { 1422 ImmIdx = FrameRegIdx+2; 1423 InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); 1424 if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 1425 InstrOffs *= -1; 1426 NumBits = 12; 1427 break; 1428 } 1429 case ARMII::AddrMode3: { 1430 ImmIdx = FrameRegIdx+2; 1431 InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); 1432 if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 1433 InstrOffs *= -1; 1434 NumBits = 8; 1435 break; 1436 } 1437 case ARMII::AddrMode4: 1438 case ARMII::AddrMode6: 1439 // Can't fold any offset even if it's zero. 1440 return false; 1441 case ARMII::AddrMode5: { 1442 ImmIdx = FrameRegIdx+1; 1443 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); 1444 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 1445 InstrOffs *= -1; 1446 NumBits = 8; 1447 Scale = 4; 1448 break; 1449 } 1450 default: 1451 llvm_unreachable("Unsupported addressing mode!"); 1452 break; 1453 } 1454 1455 Offset += InstrOffs * Scale; 1456 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); 1457 if (Offset < 0) { 1458 Offset = -Offset; 1459 isSub = true; 1460 } 1461 1462 // Attempt to fold address comp. if opcode has offset bits 1463 if (NumBits > 0) { 1464 // Common case: small offset, fits into instruction. 1465 MachineOperand &ImmOp = MI.getOperand(ImmIdx); 1466 int ImmedOffset = Offset / Scale; 1467 unsigned Mask = (1 << NumBits) - 1; 1468 if ((unsigned)Offset <= Mask * Scale) { 1469 // Replace the FrameIndex with sp 1470 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 1471 // FIXME: When addrmode2 goes away, this will simplify (like the 1472 // T2 version), as the LDR.i12 versions don't need the encoding 1473 // tricks for the offset value. 1474 if (isSub) { 1475 if (AddrMode == ARMII::AddrMode_i12) 1476 ImmedOffset = -ImmedOffset; 1477 else 1478 ImmedOffset |= 1 << NumBits; 1479 } 1480 ImmOp.ChangeToImmediate(ImmedOffset); 1481 Offset = 0; 1482 return true; 1483 } 1484 1485 // Otherwise, it didn't fit. Pull in what we can to simplify the immed. 1486 ImmedOffset = ImmedOffset & Mask; 1487 if (isSub) { 1488 if (AddrMode == ARMII::AddrMode_i12) 1489 ImmedOffset = -ImmedOffset; 1490 else 1491 ImmedOffset |= 1 << NumBits; 1492 } 1493 ImmOp.ChangeToImmediate(ImmedOffset); 1494 Offset &= ~(Mask*Scale); 1495 } 1496 } 1497 1498 Offset = (isSub) ? -Offset : Offset; 1499 return Offset == 0; 1500 } 1501 1502 bool ARMBaseInstrInfo:: 1503 AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask, 1504 int &CmpValue) const { 1505 switch (MI->getOpcode()) { 1506 default: break; 1507 case ARM::CMPri: 1508 case ARM::t2CMPri: 1509 SrcReg = MI->getOperand(0).getReg(); 1510 CmpMask = ~0; 1511 CmpValue = MI->getOperand(1).getImm(); 1512 return true; 1513 case ARM::TSTri: 1514 case ARM::t2TSTri: 1515 SrcReg = MI->getOperand(0).getReg(); 1516 CmpMask = MI->getOperand(1).getImm(); 1517 CmpValue = 0; 1518 return true; 1519 } 1520 1521 return false; 1522 } 1523 1524 /// isSuitableForMask - Identify a suitable 'and' instruction that 1525 /// operates on the given source register and applies the same mask 1526 /// as a 'tst' instruction. Provide a limited look-through for copies. 1527 /// When successful, MI will hold the found instruction. 1528 static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, 1529 int CmpMask, bool CommonUse) { 1530 switch (MI->getOpcode()) { 1531 case ARM::ANDri: 1532 case ARM::t2ANDri: 1533 if (CmpMask != MI->getOperand(2).getImm()) 1534 return false; 1535 if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg()) 1536 return true; 1537 break; 1538 case ARM::COPY: { 1539 // Walk down one instruction which is potentially an 'and'. 1540 const MachineInstr &Copy = *MI; 1541 MachineBasicBlock::iterator AND( 1542 llvm::next(MachineBasicBlock::iterator(MI))); 1543 if (AND == MI->getParent()->end()) return false; 1544 MI = AND; 1545 return isSuitableForMask(MI, Copy.getOperand(0).getReg(), 1546 CmpMask, true); 1547 } 1548 } 1549 1550 return false; 1551 } 1552 1553 /// OptimizeCompareInstr - Convert the instruction supplying the argument to the 1554 /// comparison into one that sets the zero bit in the flags register. 1555 bool ARMBaseInstrInfo:: 1556 OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, 1557 int CmpValue, const MachineRegisterInfo *MRI) const { 1558 if (CmpValue != 0) 1559 return false; 1560 1561 MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg); 1562 if (llvm::next(DI) != MRI->def_end()) 1563 // Only support one definition. 1564 return false; 1565 1566 MachineInstr *MI = &*DI; 1567 1568 // Masked compares sometimes use the same register as the corresponding 'and'. 1569 if (CmpMask != ~0) { 1570 if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) { 1571 MI = 0; 1572 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg), 1573 UE = MRI->use_end(); UI != UE; ++UI) { 1574 if (UI->getParent() != CmpInstr->getParent()) continue; 1575 MachineInstr *PotentialAND = &*UI; 1576 if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true)) 1577 continue; 1578 MI = PotentialAND; 1579 break; 1580 } 1581 if (!MI) return false; 1582 } 1583 } 1584 1585 // Conservatively refuse to convert an instruction which isn't in the same BB 1586 // as the comparison. 1587 if (MI->getParent() != CmpInstr->getParent()) 1588 return false; 1589 1590 // Check that CPSR isn't set between the comparison instruction and the one we 1591 // want to change. 1592 MachineBasicBlock::const_iterator I = CmpInstr, E = MI, 1593 B = MI->getParent()->begin(); 1594 1595 // Early exit if CmpInstr is at the beginning of the BB. 1596 if (I == B) return false; 1597 1598 --I; 1599 for (; I != E; --I) { 1600 const MachineInstr &Instr = *I; 1601 1602 for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) { 1603 const MachineOperand &MO = Instr.getOperand(IO); 1604 if (!MO.isReg()) continue; 1605 1606 // This instruction modifies or uses CPSR after the one we want to 1607 // change. We can't do this transformation. 1608 if (MO.getReg() == ARM::CPSR) 1609 return false; 1610 } 1611 1612 if (I == B) 1613 // The 'and' is below the comparison instruction. 1614 return false; 1615 } 1616 1617 // Set the "zero" bit in CPSR. 1618 switch (MI->getOpcode()) { 1619 default: break; 1620 case ARM::RSBrr: 1621 case ARM::RSBri: 1622 case ARM::RSCrr: 1623 case ARM::RSCri: 1624 case ARM::ADDrr: 1625 case ARM::ADDri: 1626 case ARM::ADCrr: 1627 case ARM::ADCri: 1628 case ARM::SUBrr: 1629 case ARM::SUBri: 1630 case ARM::SBCrr: 1631 case ARM::SBCri: 1632 case ARM::t2RSBri: 1633 case ARM::t2ADDrr: 1634 case ARM::t2ADDri: 1635 case ARM::t2ADCrr: 1636 case ARM::t2ADCri: 1637 case ARM::t2SUBrr: 1638 case ARM::t2SUBri: 1639 case ARM::t2SBCrr: 1640 case ARM::t2SBCri: 1641 case ARM::ANDrr: 1642 case ARM::ANDri: 1643 case ARM::t2ANDrr: 1644 case ARM::t2ANDri: 1645 case ARM::ORRrr: 1646 case ARM::ORRri: 1647 case ARM::t2ORRrr: 1648 case ARM::t2ORRri: 1649 case ARM::EORrr: 1650 case ARM::EORri: 1651 case ARM::t2EORrr: 1652 case ARM::t2EORri: { 1653 // Scan forward for the use of CPSR, if it's a conditional code requires 1654 // checking of V bit, then this is not safe to do. If we can't find the 1655 // CPSR use (i.e. used in another block), then it's not safe to perform 1656 // the optimization. 1657 bool isSafe = false; 1658 I = CmpInstr; 1659 E = MI->getParent()->end(); 1660 while (!isSafe && ++I != E) { 1661 const MachineInstr &Instr = *I; 1662 for (unsigned IO = 0, EO = Instr.getNumOperands(); 1663 !isSafe && IO != EO; ++IO) { 1664 const MachineOperand &MO = Instr.getOperand(IO); 1665 if (!MO.isReg() || MO.getReg() != ARM::CPSR) 1666 continue; 1667 if (MO.isDef()) { 1668 isSafe = true; 1669 break; 1670 } 1671 // Condition code is after the operand before CPSR. 1672 ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm(); 1673 switch (CC) { 1674 default: 1675 isSafe = true; 1676 break; 1677 case ARMCC::VS: 1678 case ARMCC::VC: 1679 case ARMCC::GE: 1680 case ARMCC::LT: 1681 case ARMCC::GT: 1682 case ARMCC::LE: 1683 return false; 1684 } 1685 } 1686 } 1687 1688 if (!isSafe) 1689 return false; 1690 1691 // Toggle the optional operand to CPSR. 1692 MI->getOperand(5).setReg(ARM::CPSR); 1693 MI->getOperand(5).setIsDef(true); 1694 CmpInstr->eraseFromParent(); 1695 return true; 1696 } 1697 } 1698 1699 return false; 1700 } 1701 1702 bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, 1703 MachineInstr *DefMI, unsigned Reg, 1704 MachineRegisterInfo *MRI) const { 1705 // Fold large immediates into add, sub, or, xor. 1706 unsigned DefOpc = DefMI->getOpcode(); 1707 if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm) 1708 return false; 1709 if (!DefMI->getOperand(1).isImm()) 1710 // Could be t2MOVi32imm <ga:xx> 1711 return false; 1712 1713 if (!MRI->hasOneNonDBGUse(Reg)) 1714 return false; 1715 1716 unsigned UseOpc = UseMI->getOpcode(); 1717 unsigned NewUseOpc = 0; 1718 uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm(); 1719 uint32_t SOImmValV1 = 0, SOImmValV2 = 0; 1720 bool Commute = false; 1721 switch (UseOpc) { 1722 default: return false; 1723 case ARM::SUBrr: 1724 case ARM::ADDrr: 1725 case ARM::ORRrr: 1726 case ARM::EORrr: 1727 case ARM::t2SUBrr: 1728 case ARM::t2ADDrr: 1729 case ARM::t2ORRrr: 1730 case ARM::t2EORrr: { 1731 Commute = UseMI->getOperand(2).getReg() != Reg; 1732 switch (UseOpc) { 1733 default: break; 1734 case ARM::SUBrr: { 1735 if (Commute) 1736 return false; 1737 ImmVal = -ImmVal; 1738 NewUseOpc = ARM::SUBri; 1739 // Fallthrough 1740 } 1741 case ARM::ADDrr: 1742 case ARM::ORRrr: 1743 case ARM::EORrr: { 1744 if (!ARM_AM::isSOImmTwoPartVal(ImmVal)) 1745 return false; 1746 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); 1747 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); 1748 switch (UseOpc) { 1749 default: break; 1750 case ARM::ADDrr: NewUseOpc = ARM::ADDri; break; 1751 case ARM::ORRrr: NewUseOpc = ARM::ORRri; break; 1752 case ARM::EORrr: NewUseOpc = ARM::EORri; break; 1753 } 1754 break; 1755 } 1756 case ARM::t2SUBrr: { 1757 if (Commute) 1758 return false; 1759 ImmVal = -ImmVal; 1760 NewUseOpc = ARM::t2SUBri; 1761 // Fallthrough 1762 } 1763 case ARM::t2ADDrr: 1764 case ARM::t2ORRrr: 1765 case ARM::t2EORrr: { 1766 if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal)) 1767 return false; 1768 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); 1769 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); 1770 switch (UseOpc) { 1771 default: break; 1772 case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break; 1773 case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break; 1774 case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break; 1775 } 1776 break; 1777 } 1778 } 1779 } 1780 } 1781 1782 unsigned OpIdx = Commute ? 2 : 1; 1783 unsigned Reg1 = UseMI->getOperand(OpIdx).getReg(); 1784 bool isKill = UseMI->getOperand(OpIdx).isKill(); 1785 unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); 1786 AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(), 1787 *UseMI, UseMI->getDebugLoc(), 1788 get(NewUseOpc), NewReg) 1789 .addReg(Reg1, getKillRegState(isKill)) 1790 .addImm(SOImmValV1))); 1791 UseMI->setDesc(get(NewUseOpc)); 1792 UseMI->getOperand(1).setReg(NewReg); 1793 UseMI->getOperand(1).setIsKill(); 1794 UseMI->getOperand(2).ChangeToImmediate(SOImmValV2); 1795 DefMI->eraseFromParent(); 1796 return true; 1797 } 1798 1799 unsigned 1800 ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, 1801 const MachineInstr *MI) const { 1802 if (!ItinData || ItinData->isEmpty()) 1803 return 1; 1804 1805 const MCInstrDesc &Desc = MI->getDesc(); 1806 unsigned Class = Desc.getSchedClass(); 1807 unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; 1808 if (UOps) 1809 return UOps; 1810 1811 unsigned Opc = MI->getOpcode(); 1812 switch (Opc) { 1813 default: 1814 llvm_unreachable("Unexpected multi-uops instruction!"); 1815 break; 1816 case ARM::VLDMQIA: 1817 case ARM::VSTMQIA: 1818 return 2; 1819 1820 // The number of uOps for load / store multiple are determined by the number 1821 // registers. 1822 // 1823 // On Cortex-A8, each pair of register loads / stores can be scheduled on the 1824 // same cycle. The scheduling for the first load / store must be done 1825 // separately by assuming the the address is not 64-bit aligned. 1826 // 1827 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address 1828 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON 1829 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1. 1830 case ARM::VLDMDIA: 1831 case ARM::VLDMDIA_UPD: 1832 case ARM::VLDMDDB_UPD: 1833 case ARM::VLDMSIA: 1834 case ARM::VLDMSIA_UPD: 1835 case ARM::VLDMSDB_UPD: 1836 case ARM::VSTMDIA: 1837 case ARM::VSTMDIA_UPD: 1838 case ARM::VSTMDDB_UPD: 1839 case ARM::VSTMSIA: 1840 case ARM::VSTMSIA_UPD: 1841 case ARM::VSTMSDB_UPD: { 1842 unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands(); 1843 return (NumRegs / 2) + (NumRegs % 2) + 1; 1844 } 1845 1846 case ARM::LDMIA_RET: 1847 case ARM::LDMIA: 1848 case ARM::LDMDA: 1849 case ARM::LDMDB: 1850 case ARM::LDMIB: 1851 case ARM::LDMIA_UPD: 1852 case ARM::LDMDA_UPD: 1853 case ARM::LDMDB_UPD: 1854 case ARM::LDMIB_UPD: 1855 case ARM::STMIA: 1856 case ARM::STMDA: 1857 case ARM::STMDB: 1858 case ARM::STMIB: 1859 case ARM::STMIA_UPD: 1860 case ARM::STMDA_UPD: 1861 case ARM::STMDB_UPD: 1862 case ARM::STMIB_UPD: 1863 case ARM::tLDMIA: 1864 case ARM::tLDMIA_UPD: 1865 case ARM::tSTMIA: 1866 case ARM::tSTMIA_UPD: 1867 case ARM::tPOP_RET: 1868 case ARM::tPOP: 1869 case ARM::tPUSH: 1870 case ARM::t2LDMIA_RET: 1871 case ARM::t2LDMIA: 1872 case ARM::t2LDMDB: 1873 case ARM::t2LDMIA_UPD: 1874 case ARM::t2LDMDB_UPD: 1875 case ARM::t2STMIA: 1876 case ARM::t2STMDB: 1877 case ARM::t2STMIA_UPD: 1878 case ARM::t2STMDB_UPD: { 1879 unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1; 1880 if (Subtarget.isCortexA8()) { 1881 if (NumRegs < 4) 1882 return 2; 1883 // 4 registers would be issued: 2, 2. 1884 // 5 registers would be issued: 2, 2, 1. 1885 UOps = (NumRegs / 2); 1886 if (NumRegs % 2) 1887 ++UOps; 1888 return UOps; 1889 } else if (Subtarget.isCortexA9()) { 1890 UOps = (NumRegs / 2); 1891 // If there are odd number of registers or if it's not 64-bit aligned, 1892 // then it takes an extra AGU (Address Generation Unit) cycle. 1893 if ((NumRegs % 2) || 1894 !MI->hasOneMemOperand() || 1895 (*MI->memoperands_begin())->getAlignment() < 8) 1896 ++UOps; 1897 return UOps; 1898 } else { 1899 // Assume the worst. 1900 return NumRegs; 1901 } 1902 } 1903 } 1904 } 1905 1906 int 1907 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, 1908 const MCInstrDesc &DefMCID, 1909 unsigned DefClass, 1910 unsigned DefIdx, unsigned DefAlign) const { 1911 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; 1912 if (RegNo <= 0) 1913 // Def is the address writeback. 1914 return ItinData->getOperandCycle(DefClass, DefIdx); 1915 1916 int DefCycle; 1917 if (Subtarget.isCortexA8()) { 1918 // (regno / 2) + (regno % 2) + 1 1919 DefCycle = RegNo / 2 + 1; 1920 if (RegNo % 2) 1921 ++DefCycle; 1922 } else if (Subtarget.isCortexA9()) { 1923 DefCycle = RegNo; 1924 bool isSLoad = false; 1925 1926 switch (DefMCID.getOpcode()) { 1927 default: break; 1928 case ARM::VLDMSIA: 1929 case ARM::VLDMSIA_UPD: 1930 case ARM::VLDMSDB_UPD: 1931 isSLoad = true; 1932 break; 1933 } 1934 1935 // If there are odd number of 'S' registers or if it's not 64-bit aligned, 1936 // then it takes an extra cycle. 1937 if ((isSLoad && (RegNo % 2)) || DefAlign < 8) 1938 ++DefCycle; 1939 } else { 1940 // Assume the worst. 1941 DefCycle = RegNo + 2; 1942 } 1943 1944 return DefCycle; 1945 } 1946 1947 int 1948 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, 1949 const MCInstrDesc &DefMCID, 1950 unsigned DefClass, 1951 unsigned DefIdx, unsigned DefAlign) const { 1952 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; 1953 if (RegNo <= 0) 1954 // Def is the address writeback. 1955 return ItinData->getOperandCycle(DefClass, DefIdx); 1956 1957 int DefCycle; 1958 if (Subtarget.isCortexA8()) { 1959 // 4 registers would be issued: 1, 2, 1. 1960 // 5 registers would be issued: 1, 2, 2. 1961 DefCycle = RegNo / 2; 1962 if (DefCycle < 1) 1963 DefCycle = 1; 1964 // Result latency is issue cycle + 2: E2. 1965 DefCycle += 2; 1966 } else if (Subtarget.isCortexA9()) { 1967 DefCycle = (RegNo / 2); 1968 // If there are odd number of registers or if it's not 64-bit aligned, 1969 // then it takes an extra AGU (Address Generation Unit) cycle. 1970 if ((RegNo % 2) || DefAlign < 8) 1971 ++DefCycle; 1972 // Result latency is AGU cycles + 2. 1973 DefCycle += 2; 1974 } else { 1975 // Assume the worst. 1976 DefCycle = RegNo + 2; 1977 } 1978 1979 return DefCycle; 1980 } 1981 1982 int 1983 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, 1984 const MCInstrDesc &UseMCID, 1985 unsigned UseClass, 1986 unsigned UseIdx, unsigned UseAlign) const { 1987 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; 1988 if (RegNo <= 0) 1989 return ItinData->getOperandCycle(UseClass, UseIdx); 1990 1991 int UseCycle; 1992 if (Subtarget.isCortexA8()) { 1993 // (regno / 2) + (regno % 2) + 1 1994 UseCycle = RegNo / 2 + 1; 1995 if (RegNo % 2) 1996 ++UseCycle; 1997 } else if (Subtarget.isCortexA9()) { 1998 UseCycle = RegNo; 1999 bool isSStore = false; 2000 2001 switch (UseMCID.getOpcode()) { 2002 default: break; 2003 case ARM::VSTMSIA: 2004 case ARM::VSTMSIA_UPD: 2005 case ARM::VSTMSDB_UPD: 2006 isSStore = true; 2007 break; 2008 } 2009 2010 // If there are odd number of 'S' registers or if it's not 64-bit aligned, 2011 // then it takes an extra cycle. 2012 if ((isSStore && (RegNo % 2)) || UseAlign < 8) 2013 ++UseCycle; 2014 } else { 2015 // Assume the worst. 2016 UseCycle = RegNo + 2; 2017 } 2018 2019 return UseCycle; 2020 } 2021 2022 int 2023 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, 2024 const MCInstrDesc &UseMCID, 2025 unsigned UseClass, 2026 unsigned UseIdx, unsigned UseAlign) const { 2027 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; 2028 if (RegNo <= 0) 2029 return ItinData->getOperandCycle(UseClass, UseIdx); 2030 2031 int UseCycle; 2032 if (Subtarget.isCortexA8()) { 2033 UseCycle = RegNo / 2; 2034 if (UseCycle < 2) 2035 UseCycle = 2; 2036 // Read in E3. 2037 UseCycle += 2; 2038 } else if (Subtarget.isCortexA9()) { 2039 UseCycle = (RegNo / 2); 2040 // If there are odd number of registers or if it's not 64-bit aligned, 2041 // then it takes an extra AGU (Address Generation Unit) cycle. 2042 if ((RegNo % 2) || UseAlign < 8) 2043 ++UseCycle; 2044 } else { 2045 // Assume the worst. 2046 UseCycle = 1; 2047 } 2048 return UseCycle; 2049 } 2050 2051 int 2052 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 2053 const MCInstrDesc &DefMCID, 2054 unsigned DefIdx, unsigned DefAlign, 2055 const MCInstrDesc &UseMCID, 2056 unsigned UseIdx, unsigned UseAlign) const { 2057 unsigned DefClass = DefMCID.getSchedClass(); 2058 unsigned UseClass = UseMCID.getSchedClass(); 2059 2060 if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands()) 2061 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); 2062 2063 // This may be a def / use of a variable_ops instruction, the operand 2064 // latency might be determinable dynamically. Let the target try to 2065 // figure it out. 2066 int DefCycle = -1; 2067 bool LdmBypass = false; 2068 switch (DefMCID.getOpcode()) { 2069 default: 2070 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 2071 break; 2072 2073 case ARM::VLDMDIA: 2074 case ARM::VLDMDIA_UPD: 2075 case ARM::VLDMDDB_UPD: 2076 case ARM::VLDMSIA: 2077 case ARM::VLDMSIA_UPD: 2078 case ARM::VLDMSDB_UPD: 2079 DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); 2080 break; 2081 2082 case ARM::LDMIA_RET: 2083 case ARM::LDMIA: 2084 case ARM::LDMDA: 2085 case ARM::LDMDB: 2086 case ARM::LDMIB: 2087 case ARM::LDMIA_UPD: 2088 case ARM::LDMDA_UPD: 2089 case ARM::LDMDB_UPD: 2090 case ARM::LDMIB_UPD: 2091 case ARM::tLDMIA: 2092 case ARM::tLDMIA_UPD: 2093 case ARM::tPUSH: 2094 case ARM::t2LDMIA_RET: 2095 case ARM::t2LDMIA: 2096 case ARM::t2LDMDB: 2097 case ARM::t2LDMIA_UPD: 2098 case ARM::t2LDMDB_UPD: 2099 LdmBypass = 1; 2100 DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); 2101 break; 2102 } 2103 2104 if (DefCycle == -1) 2105 // We can't seem to determine the result latency of the def, assume it's 2. 2106 DefCycle = 2; 2107 2108 int UseCycle = -1; 2109 switch (UseMCID.getOpcode()) { 2110 default: 2111 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx); 2112 break; 2113 2114 case ARM::VSTMDIA: 2115 case ARM::VSTMDIA_UPD: 2116 case ARM::VSTMDDB_UPD: 2117 case ARM::VSTMSIA: 2118 case ARM::VSTMSIA_UPD: 2119 case ARM::VSTMSDB_UPD: 2120 UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); 2121 break; 2122 2123 case ARM::STMIA: 2124 case ARM::STMDA: 2125 case ARM::STMDB: 2126 case ARM::STMIB: 2127 case ARM::STMIA_UPD: 2128 case ARM::STMDA_UPD: 2129 case ARM::STMDB_UPD: 2130 case ARM::STMIB_UPD: 2131 case ARM::tSTMIA: 2132 case ARM::tSTMIA_UPD: 2133 case ARM::tPOP_RET: 2134 case ARM::tPOP: 2135 case ARM::t2STMIA: 2136 case ARM::t2STMDB: 2137 case ARM::t2STMIA_UPD: 2138 case ARM::t2STMDB_UPD: 2139 UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); 2140 break; 2141 } 2142 2143 if (UseCycle == -1) 2144 // Assume it's read in the first stage. 2145 UseCycle = 1; 2146 2147 UseCycle = DefCycle - UseCycle + 1; 2148 if (UseCycle > 0) { 2149 if (LdmBypass) { 2150 // It's a variable_ops instruction so we can't use DefIdx here. Just use 2151 // first def operand. 2152 if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1, 2153 UseClass, UseIdx)) 2154 --UseCycle; 2155 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx, 2156 UseClass, UseIdx)) { 2157 --UseCycle; 2158 } 2159 } 2160 2161 return UseCycle; 2162 } 2163 2164 int 2165 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 2166 const MachineInstr *DefMI, unsigned DefIdx, 2167 const MachineInstr *UseMI, unsigned UseIdx) const { 2168 if (DefMI->isCopyLike() || DefMI->isInsertSubreg() || 2169 DefMI->isRegSequence() || DefMI->isImplicitDef()) 2170 return 1; 2171 2172 const MCInstrDesc &DefMCID = DefMI->getDesc(); 2173 if (!ItinData || ItinData->isEmpty()) 2174 return DefMCID.mayLoad() ? 3 : 1; 2175 2176 const MCInstrDesc &UseMCID = UseMI->getDesc(); 2177 const MachineOperand &DefMO = DefMI->getOperand(DefIdx); 2178 if (DefMO.getReg() == ARM::CPSR) { 2179 if (DefMI->getOpcode() == ARM::FMSTAT) { 2180 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) 2181 return Subtarget.isCortexA9() ? 1 : 20; 2182 } 2183 2184 // CPSR set and branch can be paired in the same cycle. 2185 if (UseMCID.isBranch()) 2186 return 0; 2187 } 2188 2189 unsigned DefAlign = DefMI->hasOneMemOperand() 2190 ? (*DefMI->memoperands_begin())->getAlignment() : 0; 2191 unsigned UseAlign = UseMI->hasOneMemOperand() 2192 ? (*UseMI->memoperands_begin())->getAlignment() : 0; 2193 int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, 2194 UseMCID, UseIdx, UseAlign); 2195 2196 if (Latency > 1 && 2197 (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { 2198 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 2199 // variants are one cycle cheaper. 2200 switch (DefMCID.getOpcode()) { 2201 default: break; 2202 case ARM::LDRrs: 2203 case ARM::LDRBrs: { 2204 unsigned ShOpVal = DefMI->getOperand(3).getImm(); 2205 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 2206 if (ShImm == 0 || 2207 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 2208 --Latency; 2209 break; 2210 } 2211 case ARM::t2LDRs: 2212 case ARM::t2LDRBs: 2213 case ARM::t2LDRHs: 2214 case ARM::t2LDRSHs: { 2215 // Thumb2 mode: lsl only. 2216 unsigned ShAmt = DefMI->getOperand(3).getImm(); 2217 if (ShAmt == 0 || ShAmt == 2) 2218 --Latency; 2219 break; 2220 } 2221 } 2222 } 2223 2224 if (DefAlign < 8 && Subtarget.isCortexA9()) 2225 switch (DefMCID.getOpcode()) { 2226 default: break; 2227 case ARM::VLD1q8: 2228 case ARM::VLD1q16: 2229 case ARM::VLD1q32: 2230 case ARM::VLD1q64: 2231 case ARM::VLD1q8_UPD: 2232 case ARM::VLD1q16_UPD: 2233 case ARM::VLD1q32_UPD: 2234 case ARM::VLD1q64_UPD: 2235 case ARM::VLD2d8: 2236 case ARM::VLD2d16: 2237 case ARM::VLD2d32: 2238 case ARM::VLD2q8: 2239 case ARM::VLD2q16: 2240 case ARM::VLD2q32: 2241 case ARM::VLD2d8_UPD: 2242 case ARM::VLD2d16_UPD: 2243 case ARM::VLD2d32_UPD: 2244 case ARM::VLD2q8_UPD: 2245 case ARM::VLD2q16_UPD: 2246 case ARM::VLD2q32_UPD: 2247 case ARM::VLD3d8: 2248 case ARM::VLD3d16: 2249 case ARM::VLD3d32: 2250 case ARM::VLD1d64T: 2251 case ARM::VLD3d8_UPD: 2252 case ARM::VLD3d16_UPD: 2253 case ARM::VLD3d32_UPD: 2254 case ARM::VLD1d64T_UPD: 2255 case ARM::VLD3q8_UPD: 2256 case ARM::VLD3q16_UPD: 2257 case ARM::VLD3q32_UPD: 2258 case ARM::VLD4d8: 2259 case ARM::VLD4d16: 2260 case ARM::VLD4d32: 2261 case ARM::VLD1d64Q: 2262 case ARM::VLD4d8_UPD: 2263 case ARM::VLD4d16_UPD: 2264 case ARM::VLD4d32_UPD: 2265 case ARM::VLD1d64Q_UPD: 2266 case ARM::VLD4q8_UPD: 2267 case ARM::VLD4q16_UPD: 2268 case ARM::VLD4q32_UPD: 2269 case ARM::VLD1DUPq8: 2270 case ARM::VLD1DUPq16: 2271 case ARM::VLD1DUPq32: 2272 case ARM::VLD1DUPq8_UPD: 2273 case ARM::VLD1DUPq16_UPD: 2274 case ARM::VLD1DUPq32_UPD: 2275 case ARM::VLD2DUPd8: 2276 case ARM::VLD2DUPd16: 2277 case ARM::VLD2DUPd32: 2278 case ARM::VLD2DUPd8_UPD: 2279 case ARM::VLD2DUPd16_UPD: 2280 case ARM::VLD2DUPd32_UPD: 2281 case ARM::VLD4DUPd8: 2282 case ARM::VLD4DUPd16: 2283 case ARM::VLD4DUPd32: 2284 case ARM::VLD4DUPd8_UPD: 2285 case ARM::VLD4DUPd16_UPD: 2286 case ARM::VLD4DUPd32_UPD: 2287 case ARM::VLD1LNd8: 2288 case ARM::VLD1LNd16: 2289 case ARM::VLD1LNd32: 2290 case ARM::VLD1LNd8_UPD: 2291 case ARM::VLD1LNd16_UPD: 2292 case ARM::VLD1LNd32_UPD: 2293 case ARM::VLD2LNd8: 2294 case ARM::VLD2LNd16: 2295 case ARM::VLD2LNd32: 2296 case ARM::VLD2LNq16: 2297 case ARM::VLD2LNq32: 2298 case ARM::VLD2LNd8_UPD: 2299 case ARM::VLD2LNd16_UPD: 2300 case ARM::VLD2LNd32_UPD: 2301 case ARM::VLD2LNq16_UPD: 2302 case ARM::VLD2LNq32_UPD: 2303 case ARM::VLD4LNd8: 2304 case ARM::VLD4LNd16: 2305 case ARM::VLD4LNd32: 2306 case ARM::VLD4LNq16: 2307 case ARM::VLD4LNq32: 2308 case ARM::VLD4LNd8_UPD: 2309 case ARM::VLD4LNd16_UPD: 2310 case ARM::VLD4LNd32_UPD: 2311 case ARM::VLD4LNq16_UPD: 2312 case ARM::VLD4LNq32_UPD: 2313 // If the address is not 64-bit aligned, the latencies of these 2314 // instructions increases by one. 2315 ++Latency; 2316 break; 2317 } 2318 2319 return Latency; 2320 } 2321 2322 int 2323 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 2324 SDNode *DefNode, unsigned DefIdx, 2325 SDNode *UseNode, unsigned UseIdx) const { 2326 if (!DefNode->isMachineOpcode()) 2327 return 1; 2328 2329 const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode()); 2330 2331 if (isZeroCost(DefMCID.Opcode)) 2332 return 0; 2333 2334 if (!ItinData || ItinData->isEmpty()) 2335 return DefMCID.mayLoad() ? 3 : 1; 2336 2337 if (!UseNode->isMachineOpcode()) { 2338 int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); 2339 if (Subtarget.isCortexA9()) 2340 return Latency <= 2 ? 1 : Latency - 1; 2341 else 2342 return Latency <= 3 ? 1 : Latency - 2; 2343 } 2344 2345 const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode()); 2346 const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode); 2347 unsigned DefAlign = !DefMN->memoperands_empty() 2348 ? (*DefMN->memoperands_begin())->getAlignment() : 0; 2349 const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode); 2350 unsigned UseAlign = !UseMN->memoperands_empty() 2351 ? (*UseMN->memoperands_begin())->getAlignment() : 0; 2352 int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, 2353 UseMCID, UseIdx, UseAlign); 2354 2355 if (Latency > 1 && 2356 (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { 2357 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 2358 // variants are one cycle cheaper. 2359 switch (DefMCID.getOpcode()) { 2360 default: break; 2361 case ARM::LDRrs: 2362 case ARM::LDRBrs: { 2363 unsigned ShOpVal = 2364 cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 2365 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 2366 if (ShImm == 0 || 2367 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 2368 --Latency; 2369 break; 2370 } 2371 case ARM::t2LDRs: 2372 case ARM::t2LDRBs: 2373 case ARM::t2LDRHs: 2374 case ARM::t2LDRSHs: { 2375 // Thumb2 mode: lsl only. 2376 unsigned ShAmt = 2377 cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 2378 if (ShAmt == 0 || ShAmt == 2) 2379 --Latency; 2380 break; 2381 } 2382 } 2383 } 2384 2385 if (DefAlign < 8 && Subtarget.isCortexA9()) 2386 switch (DefMCID.getOpcode()) { 2387 default: break; 2388 case ARM::VLD1q8Pseudo: 2389 case ARM::VLD1q16Pseudo: 2390 case ARM::VLD1q32Pseudo: 2391 case ARM::VLD1q64Pseudo: 2392 case ARM::VLD1q8Pseudo_UPD: 2393 case ARM::VLD1q16Pseudo_UPD: 2394 case ARM::VLD1q32Pseudo_UPD: 2395 case ARM::VLD1q64Pseudo_UPD: 2396 case ARM::VLD2d8Pseudo: 2397 case ARM::VLD2d16Pseudo: 2398 case ARM::VLD2d32Pseudo: 2399 case ARM::VLD2q8Pseudo: 2400 case ARM::VLD2q16Pseudo: 2401 case ARM::VLD2q32Pseudo: 2402 case ARM::VLD2d8Pseudo_UPD: 2403 case ARM::VLD2d16Pseudo_UPD: 2404 case ARM::VLD2d32Pseudo_UPD: 2405 case ARM::VLD2q8Pseudo_UPD: 2406 case ARM::VLD2q16Pseudo_UPD: 2407 case ARM::VLD2q32Pseudo_UPD: 2408 case ARM::VLD3d8Pseudo: 2409 case ARM::VLD3d16Pseudo: 2410 case ARM::VLD3d32Pseudo: 2411 case ARM::VLD1d64TPseudo: 2412 case ARM::VLD3d8Pseudo_UPD: 2413 case ARM::VLD3d16Pseudo_UPD: 2414 case ARM::VLD3d32Pseudo_UPD: 2415 case ARM::VLD1d64TPseudo_UPD: 2416 case ARM::VLD3q8Pseudo_UPD: 2417 case ARM::VLD3q16Pseudo_UPD: 2418 case ARM::VLD3q32Pseudo_UPD: 2419 case ARM::VLD3q8oddPseudo: 2420 case ARM::VLD3q16oddPseudo: 2421 case ARM::VLD3q32oddPseudo: 2422 case ARM::VLD3q8oddPseudo_UPD: 2423 case ARM::VLD3q16oddPseudo_UPD: 2424 case ARM::VLD3q32oddPseudo_UPD: 2425 case ARM::VLD4d8Pseudo: 2426 case ARM::VLD4d16Pseudo: 2427 case ARM::VLD4d32Pseudo: 2428 case ARM::VLD1d64QPseudo: 2429 case ARM::VLD4d8Pseudo_UPD: 2430 case ARM::VLD4d16Pseudo_UPD: 2431 case ARM::VLD4d32Pseudo_UPD: 2432 case ARM::VLD1d64QPseudo_UPD: 2433 case ARM::VLD4q8Pseudo_UPD: 2434 case ARM::VLD4q16Pseudo_UPD: 2435 case ARM::VLD4q32Pseudo_UPD: 2436 case ARM::VLD4q8oddPseudo: 2437 case ARM::VLD4q16oddPseudo: 2438 case ARM::VLD4q32oddPseudo: 2439 case ARM::VLD4q8oddPseudo_UPD: 2440 case ARM::VLD4q16oddPseudo_UPD: 2441 case ARM::VLD4q32oddPseudo_UPD: 2442 case ARM::VLD1DUPq8Pseudo: 2443 case ARM::VLD1DUPq16Pseudo: 2444 case ARM::VLD1DUPq32Pseudo: 2445 case ARM::VLD1DUPq8Pseudo_UPD: 2446 case ARM::VLD1DUPq16Pseudo_UPD: 2447 case ARM::VLD1DUPq32Pseudo_UPD: 2448 case ARM::VLD2DUPd8Pseudo: 2449 case ARM::VLD2DUPd16Pseudo: 2450 case ARM::VLD2DUPd32Pseudo: 2451 case ARM::VLD2DUPd8Pseudo_UPD: 2452 case ARM::VLD2DUPd16Pseudo_UPD: 2453 case ARM::VLD2DUPd32Pseudo_UPD: 2454 case ARM::VLD4DUPd8Pseudo: 2455 case ARM::VLD4DUPd16Pseudo: 2456 case ARM::VLD4DUPd32Pseudo: 2457 case ARM::VLD4DUPd8Pseudo_UPD: 2458 case ARM::VLD4DUPd16Pseudo_UPD: 2459 case ARM::VLD4DUPd32Pseudo_UPD: 2460 case ARM::VLD1LNq8Pseudo: 2461 case ARM::VLD1LNq16Pseudo: 2462 case ARM::VLD1LNq32Pseudo: 2463 case ARM::VLD1LNq8Pseudo_UPD: 2464 case ARM::VLD1LNq16Pseudo_UPD: 2465 case ARM::VLD1LNq32Pseudo_UPD: 2466 case ARM::VLD2LNd8Pseudo: 2467 case ARM::VLD2LNd16Pseudo: 2468 case ARM::VLD2LNd32Pseudo: 2469 case ARM::VLD2LNq16Pseudo: 2470 case ARM::VLD2LNq32Pseudo: 2471 case ARM::VLD2LNd8Pseudo_UPD: 2472 case ARM::VLD2LNd16Pseudo_UPD: 2473 case ARM::VLD2LNd32Pseudo_UPD: 2474 case ARM::VLD2LNq16Pseudo_UPD: 2475 case ARM::VLD2LNq32Pseudo_UPD: 2476 case ARM::VLD4LNd8Pseudo: 2477 case ARM::VLD4LNd16Pseudo: 2478 case ARM::VLD4LNd32Pseudo: 2479 case ARM::VLD4LNq16Pseudo: 2480 case ARM::VLD4LNq32Pseudo: 2481 case ARM::VLD4LNd8Pseudo_UPD: 2482 case ARM::VLD4LNd16Pseudo_UPD: 2483 case ARM::VLD4LNd32Pseudo_UPD: 2484 case ARM::VLD4LNq16Pseudo_UPD: 2485 case ARM::VLD4LNq32Pseudo_UPD: 2486 // If the address is not 64-bit aligned, the latencies of these 2487 // instructions increases by one. 2488 ++Latency; 2489 break; 2490 } 2491 2492 return Latency; 2493 } 2494 2495 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 2496 const MachineInstr *MI, 2497 unsigned *PredCost) const { 2498 if (MI->isCopyLike() || MI->isInsertSubreg() || 2499 MI->isRegSequence() || MI->isImplicitDef()) 2500 return 1; 2501 2502 if (!ItinData || ItinData->isEmpty()) 2503 return 1; 2504 2505 const MCInstrDesc &MCID = MI->getDesc(); 2506 unsigned Class = MCID.getSchedClass(); 2507 unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; 2508 if (PredCost && MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) 2509 // When predicated, CPSR is an additional source operand for CPSR updating 2510 // instructions, this apparently increases their latencies. 2511 *PredCost = 1; 2512 if (UOps) 2513 return ItinData->getStageLatency(Class); 2514 return getNumMicroOps(ItinData, MI); 2515 } 2516 2517 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 2518 SDNode *Node) const { 2519 if (!Node->isMachineOpcode()) 2520 return 1; 2521 2522 if (!ItinData || ItinData->isEmpty()) 2523 return 1; 2524 2525 unsigned Opcode = Node->getMachineOpcode(); 2526 switch (Opcode) { 2527 default: 2528 return ItinData->getStageLatency(get(Opcode).getSchedClass()); 2529 case ARM::VLDMQIA: 2530 case ARM::VSTMQIA: 2531 return 2; 2532 } 2533 } 2534 2535 bool ARMBaseInstrInfo:: 2536 hasHighOperandLatency(const InstrItineraryData *ItinData, 2537 const MachineRegisterInfo *MRI, 2538 const MachineInstr *DefMI, unsigned DefIdx, 2539 const MachineInstr *UseMI, unsigned UseIdx) const { 2540 unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; 2541 unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask; 2542 if (Subtarget.isCortexA8() && 2543 (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP)) 2544 // CortexA8 VFP instructions are not pipelined. 2545 return true; 2546 2547 // Hoist VFP / NEON instructions with 4 or higher latency. 2548 int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); 2549 if (Latency <= 3) 2550 return false; 2551 return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || 2552 UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON; 2553 } 2554 2555 bool ARMBaseInstrInfo:: 2556 hasLowDefLatency(const InstrItineraryData *ItinData, 2557 const MachineInstr *DefMI, unsigned DefIdx) const { 2558 if (!ItinData || ItinData->isEmpty()) 2559 return false; 2560 2561 unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; 2562 if (DDomain == ARMII::DomainGeneral) { 2563 unsigned DefClass = DefMI->getDesc().getSchedClass(); 2564 int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 2565 return (DefCycle != -1 && DefCycle <= 2); 2566 } 2567 return false; 2568 } 2569 2570 bool 2571 ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, 2572 unsigned &AddSubOpc, 2573 bool &NegAcc, bool &HasLane) const { 2574 DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode); 2575 if (I == MLxEntryMap.end()) 2576 return false; 2577 2578 const ARM_MLxEntry &Entry = ARM_MLxTable[I->second]; 2579 MulOpc = Entry.MulOpc; 2580 AddSubOpc = Entry.AddSubOpc; 2581 NegAcc = Entry.NegAcc; 2582 HasLane = Entry.HasLane; 2583 return true; 2584 } 2585