1 //===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the Base ARM implementation of the TargetInstrInfo class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "ARMBaseInstrInfo.h" 15 #include "ARM.h" 16 #include "ARMConstantPoolValue.h" 17 #include "ARMHazardRecognizer.h" 18 #include "ARMMachineFunctionInfo.h" 19 #include "ARMRegisterInfo.h" 20 #include "MCTargetDesc/ARMAddressingModes.h" 21 #include "llvm/Constants.h" 22 #include "llvm/Function.h" 23 #include "llvm/GlobalValue.h" 24 #include "llvm/CodeGen/LiveVariables.h" 25 #include "llvm/CodeGen/MachineConstantPool.h" 26 #include "llvm/CodeGen/MachineFrameInfo.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineJumpTableInfo.h" 29 #include "llvm/CodeGen/MachineMemOperand.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 #include "llvm/CodeGen/PseudoSourceValue.h" 32 #include "llvm/CodeGen/SelectionDAGNodes.h" 33 #include "llvm/MC/MCAsmInfo.h" 34 #include "llvm/Support/BranchProbability.h" 35 #include "llvm/Support/CommandLine.h" 36 #include "llvm/Support/Debug.h" 37 #include "llvm/Support/ErrorHandling.h" 38 #include "llvm/ADT/STLExtras.h" 39 40 #define GET_INSTRINFO_CTOR 41 #include "ARMGenInstrInfo.inc" 42 43 using namespace llvm; 44 45 static cl::opt<bool> 46 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, 47 cl::desc("Enable ARM 2-addr to 3-addr conv")); 48 49 static cl::opt<bool> 50 WidenVMOVS("widen-vmovs", cl::Hidden, 51 cl::desc("Widen ARM vmovs to vmovd when possible")); 52 53 /// ARM_MLxEntry - Record information about MLA / MLS instructions. 54 struct ARM_MLxEntry { 55 unsigned MLxOpc; // MLA / MLS opcode 56 unsigned MulOpc; // Expanded multiplication opcode 57 unsigned AddSubOpc; // Expanded add / sub opcode 58 bool NegAcc; // True if the acc is negated before the add / sub. 59 bool HasLane; // True if instruction has an extra "lane" operand. 60 }; 61 62 static const ARM_MLxEntry ARM_MLxTable[] = { 63 // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane 64 // fp scalar ops 65 { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false }, 66 { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false }, 67 { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false }, 68 { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false }, 69 { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false }, 70 { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false }, 71 { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false }, 72 { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false }, 73 74 // fp SIMD ops 75 { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false }, 76 { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false }, 77 { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false }, 78 { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false }, 79 { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true }, 80 { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true }, 81 { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true }, 82 { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true }, 83 }; 84 85 ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) 86 : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), 87 Subtarget(STI) { 88 for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) { 89 if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second) 90 assert(false && "Duplicated entries?"); 91 MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc); 92 MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc); 93 } 94 } 95 96 // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl 97 // currently defaults to no prepass hazard recognizer. 98 ScheduleHazardRecognizer *ARMBaseInstrInfo:: 99 CreateTargetHazardRecognizer(const TargetMachine *TM, 100 const ScheduleDAG *DAG) const { 101 if (usePreRAHazardRecognizer()) { 102 const InstrItineraryData *II = TM->getInstrItineraryData(); 103 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); 104 } 105 return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG); 106 } 107 108 ScheduleHazardRecognizer *ARMBaseInstrInfo:: 109 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 110 const ScheduleDAG *DAG) const { 111 if (Subtarget.isThumb2() || Subtarget.hasVFP2()) 112 return (ScheduleHazardRecognizer *) 113 new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG); 114 return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG); 115 } 116 117 MachineInstr * 118 ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, 119 MachineBasicBlock::iterator &MBBI, 120 LiveVariables *LV) const { 121 // FIXME: Thumb2 support. 122 123 if (!EnableARM3Addr) 124 return NULL; 125 126 MachineInstr *MI = MBBI; 127 MachineFunction &MF = *MI->getParent()->getParent(); 128 uint64_t TSFlags = MI->getDesc().TSFlags; 129 bool isPre = false; 130 switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { 131 default: return NULL; 132 case ARMII::IndexModePre: 133 isPre = true; 134 break; 135 case ARMII::IndexModePost: 136 break; 137 } 138 139 // Try splitting an indexed load/store to an un-indexed one plus an add/sub 140 // operation. 141 unsigned MemOpc = getUnindexedOpcode(MI->getOpcode()); 142 if (MemOpc == 0) 143 return NULL; 144 145 MachineInstr *UpdateMI = NULL; 146 MachineInstr *MemMI = NULL; 147 unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); 148 const MCInstrDesc &MCID = MI->getDesc(); 149 unsigned NumOps = MCID.getNumOperands(); 150 bool isLoad = !MCID.mayStore(); 151 const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); 152 const MachineOperand &Base = MI->getOperand(2); 153 const MachineOperand &Offset = MI->getOperand(NumOps-3); 154 unsigned WBReg = WB.getReg(); 155 unsigned BaseReg = Base.getReg(); 156 unsigned OffReg = Offset.getReg(); 157 unsigned OffImm = MI->getOperand(NumOps-2).getImm(); 158 ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm(); 159 switch (AddrMode) { 160 default: 161 assert(false && "Unknown indexed op!"); 162 return NULL; 163 case ARMII::AddrMode2: { 164 bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; 165 unsigned Amt = ARM_AM::getAM2Offset(OffImm); 166 if (OffReg == 0) { 167 if (ARM_AM::getSOImmVal(Amt) == -1) 168 // Can't encode it in a so_imm operand. This transformation will 169 // add more than 1 instruction. Abandon! 170 return NULL; 171 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 172 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 173 .addReg(BaseReg).addImm(Amt) 174 .addImm(Pred).addReg(0).addReg(0); 175 } else if (Amt != 0) { 176 ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); 177 unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); 178 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 179 get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg) 180 .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc) 181 .addImm(Pred).addReg(0).addReg(0); 182 } else 183 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 184 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 185 .addReg(BaseReg).addReg(OffReg) 186 .addImm(Pred).addReg(0).addReg(0); 187 break; 188 } 189 case ARMII::AddrMode3 : { 190 bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub; 191 unsigned Amt = ARM_AM::getAM3Offset(OffImm); 192 if (OffReg == 0) 193 // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. 194 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 195 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 196 .addReg(BaseReg).addImm(Amt) 197 .addImm(Pred).addReg(0).addReg(0); 198 else 199 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 200 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 201 .addReg(BaseReg).addReg(OffReg) 202 .addImm(Pred).addReg(0).addReg(0); 203 break; 204 } 205 } 206 207 std::vector<MachineInstr*> NewMIs; 208 if (isPre) { 209 if (isLoad) 210 MemMI = BuildMI(MF, MI->getDebugLoc(), 211 get(MemOpc), MI->getOperand(0).getReg()) 212 .addReg(WBReg).addImm(0).addImm(Pred); 213 else 214 MemMI = BuildMI(MF, MI->getDebugLoc(), 215 get(MemOpc)).addReg(MI->getOperand(1).getReg()) 216 .addReg(WBReg).addReg(0).addImm(0).addImm(Pred); 217 NewMIs.push_back(MemMI); 218 NewMIs.push_back(UpdateMI); 219 } else { 220 if (isLoad) 221 MemMI = BuildMI(MF, MI->getDebugLoc(), 222 get(MemOpc), MI->getOperand(0).getReg()) 223 .addReg(BaseReg).addImm(0).addImm(Pred); 224 else 225 MemMI = BuildMI(MF, MI->getDebugLoc(), 226 get(MemOpc)).addReg(MI->getOperand(1).getReg()) 227 .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred); 228 if (WB.isDead()) 229 UpdateMI->getOperand(0).setIsDead(); 230 NewMIs.push_back(UpdateMI); 231 NewMIs.push_back(MemMI); 232 } 233 234 // Transfer LiveVariables states, kill / dead info. 235 if (LV) { 236 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 237 MachineOperand &MO = MI->getOperand(i); 238 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 239 unsigned Reg = MO.getReg(); 240 241 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); 242 if (MO.isDef()) { 243 MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; 244 if (MO.isDead()) 245 LV->addVirtualRegisterDead(Reg, NewMI); 246 } 247 if (MO.isUse() && MO.isKill()) { 248 for (unsigned j = 0; j < 2; ++j) { 249 // Look at the two new MI's in reverse order. 250 MachineInstr *NewMI = NewMIs[j]; 251 if (!NewMI->readsRegister(Reg)) 252 continue; 253 LV->addVirtualRegisterKilled(Reg, NewMI); 254 if (VI.removeKill(MI)) 255 VI.Kills.push_back(NewMI); 256 break; 257 } 258 } 259 } 260 } 261 } 262 263 MFI->insert(MBBI, NewMIs[1]); 264 MFI->insert(MBBI, NewMIs[0]); 265 return NewMIs[0]; 266 } 267 268 // Branch analysis. 269 bool 270 ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, 271 MachineBasicBlock *&FBB, 272 SmallVectorImpl<MachineOperand> &Cond, 273 bool AllowModify) const { 274 // If the block has no terminators, it just falls into the block after it. 275 MachineBasicBlock::iterator I = MBB.end(); 276 if (I == MBB.begin()) 277 return false; 278 --I; 279 while (I->isDebugValue()) { 280 if (I == MBB.begin()) 281 return false; 282 --I; 283 } 284 if (!isUnpredicatedTerminator(I)) 285 return false; 286 287 // Get the last instruction in the block. 288 MachineInstr *LastInst = I; 289 290 // If there is only one terminator instruction, process it. 291 unsigned LastOpc = LastInst->getOpcode(); 292 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 293 if (isUncondBranchOpcode(LastOpc)) { 294 TBB = LastInst->getOperand(0).getMBB(); 295 return false; 296 } 297 if (isCondBranchOpcode(LastOpc)) { 298 // Block ends with fall-through condbranch. 299 TBB = LastInst->getOperand(0).getMBB(); 300 Cond.push_back(LastInst->getOperand(1)); 301 Cond.push_back(LastInst->getOperand(2)); 302 return false; 303 } 304 return true; // Can't handle indirect branch. 305 } 306 307 // Get the instruction before it if it is a terminator. 308 MachineInstr *SecondLastInst = I; 309 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 310 311 // If AllowModify is true and the block ends with two or more unconditional 312 // branches, delete all but the first unconditional branch. 313 if (AllowModify && isUncondBranchOpcode(LastOpc)) { 314 while (isUncondBranchOpcode(SecondLastOpc)) { 315 LastInst->eraseFromParent(); 316 LastInst = SecondLastInst; 317 LastOpc = LastInst->getOpcode(); 318 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 319 // Return now the only terminator is an unconditional branch. 320 TBB = LastInst->getOperand(0).getMBB(); 321 return false; 322 } else { 323 SecondLastInst = I; 324 SecondLastOpc = SecondLastInst->getOpcode(); 325 } 326 } 327 } 328 329 // If there are three terminators, we don't know what sort of block this is. 330 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) 331 return true; 332 333 // If the block ends with a B and a Bcc, handle it. 334 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 335 TBB = SecondLastInst->getOperand(0).getMBB(); 336 Cond.push_back(SecondLastInst->getOperand(1)); 337 Cond.push_back(SecondLastInst->getOperand(2)); 338 FBB = LastInst->getOperand(0).getMBB(); 339 return false; 340 } 341 342 // If the block ends with two unconditional branches, handle it. The second 343 // one is not executed, so remove it. 344 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 345 TBB = SecondLastInst->getOperand(0).getMBB(); 346 I = LastInst; 347 if (AllowModify) 348 I->eraseFromParent(); 349 return false; 350 } 351 352 // ...likewise if it ends with a branch table followed by an unconditional 353 // branch. The branch folder can create these, and we must get rid of them for 354 // correctness of Thumb constant islands. 355 if ((isJumpTableBranchOpcode(SecondLastOpc) || 356 isIndirectBranchOpcode(SecondLastOpc)) && 357 isUncondBranchOpcode(LastOpc)) { 358 I = LastInst; 359 if (AllowModify) 360 I->eraseFromParent(); 361 return true; 362 } 363 364 // Otherwise, can't handle this. 365 return true; 366 } 367 368 369 unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 370 MachineBasicBlock::iterator I = MBB.end(); 371 if (I == MBB.begin()) return 0; 372 --I; 373 while (I->isDebugValue()) { 374 if (I == MBB.begin()) 375 return 0; 376 --I; 377 } 378 if (!isUncondBranchOpcode(I->getOpcode()) && 379 !isCondBranchOpcode(I->getOpcode())) 380 return 0; 381 382 // Remove the branch. 383 I->eraseFromParent(); 384 385 I = MBB.end(); 386 387 if (I == MBB.begin()) return 1; 388 --I; 389 if (!isCondBranchOpcode(I->getOpcode())) 390 return 1; 391 392 // Remove the branch. 393 I->eraseFromParent(); 394 return 2; 395 } 396 397 unsigned 398 ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 399 MachineBasicBlock *FBB, 400 const SmallVectorImpl<MachineOperand> &Cond, 401 DebugLoc DL) const { 402 ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); 403 int BOpc = !AFI->isThumbFunction() 404 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); 405 int BccOpc = !AFI->isThumbFunction() 406 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc); 407 bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function(); 408 409 // Shouldn't be a fall through. 410 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 411 assert((Cond.size() == 2 || Cond.size() == 0) && 412 "ARM branch conditions have two components!"); 413 414 if (FBB == 0) { 415 if (Cond.empty()) { // Unconditional branch? 416 if (isThumb) 417 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0); 418 else 419 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); 420 } else 421 BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) 422 .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); 423 return 1; 424 } 425 426 // Two-way conditional branch. 427 BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) 428 .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); 429 if (isThumb) 430 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0); 431 else 432 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); 433 return 2; 434 } 435 436 bool ARMBaseInstrInfo:: 437 ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 438 ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); 439 Cond[0].setImm(ARMCC::getOppositeCondition(CC)); 440 return false; 441 } 442 443 bool ARMBaseInstrInfo:: 444 PredicateInstruction(MachineInstr *MI, 445 const SmallVectorImpl<MachineOperand> &Pred) const { 446 unsigned Opc = MI->getOpcode(); 447 if (isUncondBranchOpcode(Opc)) { 448 MI->setDesc(get(getMatchingCondBranchOpcode(Opc))); 449 MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm())); 450 MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false)); 451 return true; 452 } 453 454 int PIdx = MI->findFirstPredOperandIdx(); 455 if (PIdx != -1) { 456 MachineOperand &PMO = MI->getOperand(PIdx); 457 PMO.setImm(Pred[0].getImm()); 458 MI->getOperand(PIdx+1).setReg(Pred[1].getReg()); 459 return true; 460 } 461 return false; 462 } 463 464 bool ARMBaseInstrInfo:: 465 SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 466 const SmallVectorImpl<MachineOperand> &Pred2) const { 467 if (Pred1.size() > 2 || Pred2.size() > 2) 468 return false; 469 470 ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); 471 ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); 472 if (CC1 == CC2) 473 return true; 474 475 switch (CC1) { 476 default: 477 return false; 478 case ARMCC::AL: 479 return true; 480 case ARMCC::HS: 481 return CC2 == ARMCC::HI; 482 case ARMCC::LS: 483 return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; 484 case ARMCC::GE: 485 return CC2 == ARMCC::GT; 486 case ARMCC::LE: 487 return CC2 == ARMCC::LT; 488 } 489 } 490 491 bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, 492 std::vector<MachineOperand> &Pred) const { 493 // FIXME: This confuses implicit_def with optional CPSR def. 494 const MCInstrDesc &MCID = MI->getDesc(); 495 if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef()) 496 return false; 497 498 bool Found = false; 499 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 500 const MachineOperand &MO = MI->getOperand(i); 501 if (MO.isReg() && MO.getReg() == ARM::CPSR) { 502 Pred.push_back(MO); 503 Found = true; 504 } 505 } 506 507 return Found; 508 } 509 510 /// isPredicable - Return true if the specified instruction can be predicated. 511 /// By default, this returns true for every instruction with a 512 /// PredicateOperand. 513 bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { 514 const MCInstrDesc &MCID = MI->getDesc(); 515 if (!MCID.isPredicable()) 516 return false; 517 518 if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { 519 ARMFunctionInfo *AFI = 520 MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); 521 return AFI->isThumb2Function(); 522 } 523 return true; 524 } 525 526 /// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. 527 LLVM_ATTRIBUTE_NOINLINE 528 static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, 529 unsigned JTI); 530 static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, 531 unsigned JTI) { 532 assert(JTI < JT.size()); 533 return JT[JTI].MBBs.size(); 534 } 535 536 /// GetInstSize - Return the size of the specified MachineInstr. 537 /// 538 unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { 539 const MachineBasicBlock &MBB = *MI->getParent(); 540 const MachineFunction *MF = MBB.getParent(); 541 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 542 543 const MCInstrDesc &MCID = MI->getDesc(); 544 if (MCID.getSize()) 545 return MCID.getSize(); 546 547 // If this machine instr is an inline asm, measure it. 548 if (MI->getOpcode() == ARM::INLINEASM) 549 return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); 550 if (MI->isLabel()) 551 return 0; 552 unsigned Opc = MI->getOpcode(); 553 switch (Opc) { 554 case TargetOpcode::IMPLICIT_DEF: 555 case TargetOpcode::KILL: 556 case TargetOpcode::PROLOG_LABEL: 557 case TargetOpcode::EH_LABEL: 558 case TargetOpcode::DBG_VALUE: 559 return 0; 560 case ARM::MOVi16_ga_pcrel: 561 case ARM::MOVTi16_ga_pcrel: 562 case ARM::t2MOVi16_ga_pcrel: 563 case ARM::t2MOVTi16_ga_pcrel: 564 return 4; 565 case ARM::MOVi32imm: 566 case ARM::t2MOVi32imm: 567 return 8; 568 case ARM::CONSTPOOL_ENTRY: 569 // If this machine instr is a constant pool entry, its size is recorded as 570 // operand #2. 571 return MI->getOperand(2).getImm(); 572 case ARM::Int_eh_sjlj_longjmp: 573 return 16; 574 case ARM::tInt_eh_sjlj_longjmp: 575 return 10; 576 case ARM::Int_eh_sjlj_setjmp: 577 case ARM::Int_eh_sjlj_setjmp_nofp: 578 return 20; 579 case ARM::tInt_eh_sjlj_setjmp: 580 case ARM::t2Int_eh_sjlj_setjmp: 581 case ARM::t2Int_eh_sjlj_setjmp_nofp: 582 return 12; 583 case ARM::BR_JTr: 584 case ARM::BR_JTm: 585 case ARM::BR_JTadd: 586 case ARM::tBR_JTr: 587 case ARM::t2BR_JT: 588 case ARM::t2TBB_JT: 589 case ARM::t2TBH_JT: { 590 // These are jumptable branches, i.e. a branch followed by an inlined 591 // jumptable. The size is 4 + 4 * number of entries. For TBB, each 592 // entry is one byte; TBH two byte each. 593 unsigned EntrySize = (Opc == ARM::t2TBB_JT) 594 ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); 595 unsigned NumOps = MCID.getNumOperands(); 596 MachineOperand JTOP = 597 MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2)); 598 unsigned JTI = JTOP.getIndex(); 599 const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); 600 assert(MJTI != 0); 601 const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); 602 assert(JTI < JT.size()); 603 // Thumb instructions are 2 byte aligned, but JT entries are 4 byte 604 // 4 aligned. The assembler / linker may add 2 byte padding just before 605 // the JT entries. The size does not include this padding; the 606 // constant islands pass does separate bookkeeping for it. 607 // FIXME: If we know the size of the function is less than (1 << 16) *2 608 // bytes, we can use 16-bit entries instead. Then there won't be an 609 // alignment issue. 610 unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; 611 unsigned NumEntries = getNumJTEntries(JT, JTI); 612 if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) 613 // Make sure the instruction that follows TBB is 2-byte aligned. 614 // FIXME: Constant island pass should insert an "ALIGN" instruction 615 // instead. 616 ++NumEntries; 617 return NumEntries * EntrySize + InstSize; 618 } 619 default: 620 // Otherwise, pseudo-instruction sizes are zero. 621 return 0; 622 } 623 return 0; // Not reached 624 } 625 626 void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 627 MachineBasicBlock::iterator I, DebugLoc DL, 628 unsigned DestReg, unsigned SrcReg, 629 bool KillSrc) const { 630 bool GPRDest = ARM::GPRRegClass.contains(DestReg); 631 bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); 632 633 if (GPRDest && GPRSrc) { 634 AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) 635 .addReg(SrcReg, getKillRegState(KillSrc)))); 636 return; 637 } 638 639 bool SPRDest = ARM::SPRRegClass.contains(DestReg); 640 bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); 641 642 unsigned Opc = 0; 643 if (SPRDest && SPRSrc) 644 Opc = ARM::VMOVS; 645 else if (GPRDest && SPRSrc) 646 Opc = ARM::VMOVRS; 647 else if (SPRDest && GPRSrc) 648 Opc = ARM::VMOVSR; 649 else if (ARM::DPRRegClass.contains(DestReg, SrcReg)) 650 Opc = ARM::VMOVD; 651 else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) 652 Opc = ARM::VORRq; 653 654 if (Opc) { 655 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); 656 MIB.addReg(SrcReg, getKillRegState(KillSrc)); 657 if (Opc == ARM::VORRq) 658 MIB.addReg(SrcReg, getKillRegState(KillSrc)); 659 AddDefaultPred(MIB); 660 return; 661 } 662 663 // Generate instructions for VMOVQQ and VMOVQQQQ pseudos in place. 664 if (ARM::QQPRRegClass.contains(DestReg, SrcReg) || 665 ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) { 666 const TargetRegisterInfo *TRI = &getRegisterInfo(); 667 assert(ARM::qsub_0 + 3 == ARM::qsub_3 && "Expected contiguous enum."); 668 unsigned EndSubReg = ARM::QQPRRegClass.contains(DestReg, SrcReg) ? 669 ARM::qsub_1 : ARM::qsub_3; 670 for (unsigned i = ARM::qsub_0, e = EndSubReg + 1; i != e; ++i) { 671 unsigned Dst = TRI->getSubReg(DestReg, i); 672 unsigned Src = TRI->getSubReg(SrcReg, i); 673 MachineInstrBuilder Mov = 674 AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VORRq)) 675 .addReg(Dst, RegState::Define) 676 .addReg(Src, getKillRegState(KillSrc)) 677 .addReg(Src, getKillRegState(KillSrc))); 678 if (i == EndSubReg) { 679 Mov->addRegisterDefined(DestReg, TRI); 680 if (KillSrc) 681 Mov->addRegisterKilled(SrcReg, TRI); 682 } 683 } 684 return; 685 } 686 llvm_unreachable("Impossible reg-to-reg copy"); 687 } 688 689 static const 690 MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, 691 unsigned Reg, unsigned SubIdx, unsigned State, 692 const TargetRegisterInfo *TRI) { 693 if (!SubIdx) 694 return MIB.addReg(Reg, State); 695 696 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 697 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 698 return MIB.addReg(Reg, State, SubIdx); 699 } 700 701 void ARMBaseInstrInfo:: 702 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 703 unsigned SrcReg, bool isKill, int FI, 704 const TargetRegisterClass *RC, 705 const TargetRegisterInfo *TRI) const { 706 DebugLoc DL; 707 if (I != MBB.end()) DL = I->getDebugLoc(); 708 MachineFunction &MF = *MBB.getParent(); 709 MachineFrameInfo &MFI = *MF.getFrameInfo(); 710 unsigned Align = MFI.getObjectAlignment(FI); 711 712 MachineMemOperand *MMO = 713 MF.getMachineMemOperand(MachinePointerInfo( 714 PseudoSourceValue::getFixedStack(FI)), 715 MachineMemOperand::MOStore, 716 MFI.getObjectSize(FI), 717 Align); 718 719 switch (RC->getSize()) { 720 case 4: 721 if (ARM::GPRRegClass.hasSubClassEq(RC)) { 722 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12)) 723 .addReg(SrcReg, getKillRegState(isKill)) 724 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 725 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { 726 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) 727 .addReg(SrcReg, getKillRegState(isKill)) 728 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 729 } else 730 llvm_unreachable("Unknown reg class!"); 731 break; 732 case 8: 733 if (ARM::DPRRegClass.hasSubClassEq(RC)) { 734 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) 735 .addReg(SrcReg, getKillRegState(isKill)) 736 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 737 } else 738 llvm_unreachable("Unknown reg class!"); 739 break; 740 case 16: 741 if (ARM::QPRRegClass.hasSubClassEq(RC)) { 742 if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { 743 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo)) 744 .addFrameIndex(FI).addImm(16) 745 .addReg(SrcReg, getKillRegState(isKill)) 746 .addMemOperand(MMO)); 747 } else { 748 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA)) 749 .addReg(SrcReg, getKillRegState(isKill)) 750 .addFrameIndex(FI) 751 .addMemOperand(MMO)); 752 } 753 } else 754 llvm_unreachable("Unknown reg class!"); 755 break; 756 case 32: 757 if (ARM::QQPRRegClass.hasSubClassEq(RC)) { 758 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 759 // FIXME: It's possible to only store part of the QQ register if the 760 // spilled def has a sub-register index. 761 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo)) 762 .addFrameIndex(FI).addImm(16) 763 .addReg(SrcReg, getKillRegState(isKill)) 764 .addMemOperand(MMO)); 765 } else { 766 MachineInstrBuilder MIB = 767 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) 768 .addFrameIndex(FI)) 769 .addMemOperand(MMO); 770 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 771 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 772 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 773 AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 774 } 775 } else 776 llvm_unreachable("Unknown reg class!"); 777 break; 778 case 64: 779 if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { 780 MachineInstrBuilder MIB = 781 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) 782 .addFrameIndex(FI)) 783 .addMemOperand(MMO); 784 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 785 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 786 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 787 MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 788 MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); 789 MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); 790 MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); 791 AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); 792 } else 793 llvm_unreachable("Unknown reg class!"); 794 break; 795 default: 796 llvm_unreachable("Unknown reg class!"); 797 } 798 } 799 800 unsigned 801 ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, 802 int &FrameIndex) const { 803 switch (MI->getOpcode()) { 804 default: break; 805 case ARM::STRrs: 806 case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. 807 if (MI->getOperand(1).isFI() && 808 MI->getOperand(2).isReg() && 809 MI->getOperand(3).isImm() && 810 MI->getOperand(2).getReg() == 0 && 811 MI->getOperand(3).getImm() == 0) { 812 FrameIndex = MI->getOperand(1).getIndex(); 813 return MI->getOperand(0).getReg(); 814 } 815 break; 816 case ARM::STRi12: 817 case ARM::t2STRi12: 818 case ARM::tSTRspi: 819 case ARM::VSTRD: 820 case ARM::VSTRS: 821 if (MI->getOperand(1).isFI() && 822 MI->getOperand(2).isImm() && 823 MI->getOperand(2).getImm() == 0) { 824 FrameIndex = MI->getOperand(1).getIndex(); 825 return MI->getOperand(0).getReg(); 826 } 827 break; 828 case ARM::VST1q64Pseudo: 829 if (MI->getOperand(0).isFI() && 830 MI->getOperand(2).getSubReg() == 0) { 831 FrameIndex = MI->getOperand(0).getIndex(); 832 return MI->getOperand(2).getReg(); 833 } 834 break; 835 case ARM::VSTMQIA: 836 if (MI->getOperand(1).isFI() && 837 MI->getOperand(0).getSubReg() == 0) { 838 FrameIndex = MI->getOperand(1).getIndex(); 839 return MI->getOperand(0).getReg(); 840 } 841 break; 842 } 843 844 return 0; 845 } 846 847 unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, 848 int &FrameIndex) const { 849 const MachineMemOperand *Dummy; 850 return MI->getDesc().mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); 851 } 852 853 void ARMBaseInstrInfo:: 854 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 855 unsigned DestReg, int FI, 856 const TargetRegisterClass *RC, 857 const TargetRegisterInfo *TRI) const { 858 DebugLoc DL; 859 if (I != MBB.end()) DL = I->getDebugLoc(); 860 MachineFunction &MF = *MBB.getParent(); 861 MachineFrameInfo &MFI = *MF.getFrameInfo(); 862 unsigned Align = MFI.getObjectAlignment(FI); 863 MachineMemOperand *MMO = 864 MF.getMachineMemOperand( 865 MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), 866 MachineMemOperand::MOLoad, 867 MFI.getObjectSize(FI), 868 Align); 869 870 switch (RC->getSize()) { 871 case 4: 872 if (ARM::GPRRegClass.hasSubClassEq(RC)) { 873 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) 874 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 875 876 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { 877 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) 878 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 879 } else 880 llvm_unreachable("Unknown reg class!"); 881 break; 882 case 8: 883 if (ARM::DPRRegClass.hasSubClassEq(RC)) { 884 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) 885 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 886 } else 887 llvm_unreachable("Unknown reg class!"); 888 break; 889 case 16: 890 if (ARM::QPRRegClass.hasSubClassEq(RC)) { 891 if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { 892 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg) 893 .addFrameIndex(FI).addImm(16) 894 .addMemOperand(MMO)); 895 } else { 896 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) 897 .addFrameIndex(FI) 898 .addMemOperand(MMO)); 899 } 900 } else 901 llvm_unreachable("Unknown reg class!"); 902 break; 903 case 32: 904 if (ARM::QQPRRegClass.hasSubClassEq(RC)) { 905 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 906 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) 907 .addFrameIndex(FI).addImm(16) 908 .addMemOperand(MMO)); 909 } else { 910 MachineInstrBuilder MIB = 911 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 912 .addFrameIndex(FI)) 913 .addMemOperand(MMO); 914 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); 915 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); 916 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); 917 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); 918 MIB.addReg(DestReg, RegState::Define | RegState::Implicit); 919 } 920 } else 921 llvm_unreachable("Unknown reg class!"); 922 break; 923 case 64: 924 if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { 925 MachineInstrBuilder MIB = 926 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 927 .addFrameIndex(FI)) 928 .addMemOperand(MMO); 929 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); 930 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); 931 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); 932 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); 933 MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI); 934 MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI); 935 MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI); 936 MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI); 937 MIB.addReg(DestReg, RegState::Define | RegState::Implicit); 938 } else 939 llvm_unreachable("Unknown reg class!"); 940 break; 941 default: 942 llvm_unreachable("Unknown regclass!"); 943 } 944 } 945 946 unsigned 947 ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 948 int &FrameIndex) const { 949 switch (MI->getOpcode()) { 950 default: break; 951 case ARM::LDRrs: 952 case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. 953 if (MI->getOperand(1).isFI() && 954 MI->getOperand(2).isReg() && 955 MI->getOperand(3).isImm() && 956 MI->getOperand(2).getReg() == 0 && 957 MI->getOperand(3).getImm() == 0) { 958 FrameIndex = MI->getOperand(1).getIndex(); 959 return MI->getOperand(0).getReg(); 960 } 961 break; 962 case ARM::LDRi12: 963 case ARM::t2LDRi12: 964 case ARM::tLDRspi: 965 case ARM::VLDRD: 966 case ARM::VLDRS: 967 if (MI->getOperand(1).isFI() && 968 MI->getOperand(2).isImm() && 969 MI->getOperand(2).getImm() == 0) { 970 FrameIndex = MI->getOperand(1).getIndex(); 971 return MI->getOperand(0).getReg(); 972 } 973 break; 974 case ARM::VLD1q64Pseudo: 975 if (MI->getOperand(1).isFI() && 976 MI->getOperand(0).getSubReg() == 0) { 977 FrameIndex = MI->getOperand(1).getIndex(); 978 return MI->getOperand(0).getReg(); 979 } 980 break; 981 case ARM::VLDMQIA: 982 if (MI->getOperand(1).isFI() && 983 MI->getOperand(0).getSubReg() == 0) { 984 FrameIndex = MI->getOperand(1).getIndex(); 985 return MI->getOperand(0).getReg(); 986 } 987 break; 988 } 989 990 return 0; 991 } 992 993 unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, 994 int &FrameIndex) const { 995 const MachineMemOperand *Dummy; 996 return MI->getDesc().mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); 997 } 998 999 bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ 1000 // This hook gets to expand COPY instructions before they become 1001 // copyPhysReg() calls. Look for VMOVS instructions that can legally be 1002 // widened to VMOVD. We prefer the VMOVD when possible because it may be 1003 // changed into a VORR that can go down the NEON pipeline. 1004 if (!WidenVMOVS || !MI->isCopy()) 1005 return false; 1006 1007 // Look for a copy between even S-registers. That is where we keep floats 1008 // when using NEON v2f32 instructions for f32 arithmetic. 1009 unsigned DstRegS = MI->getOperand(0).getReg(); 1010 unsigned SrcRegS = MI->getOperand(1).getReg(); 1011 if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS)) 1012 return false; 1013 1014 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1015 unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, 1016 &ARM::DPRRegClass); 1017 unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, 1018 &ARM::DPRRegClass); 1019 if (!DstRegD || !SrcRegD) 1020 return false; 1021 1022 // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only 1023 // legal if the COPY already defines the full DstRegD, and it isn't a 1024 // sub-register insertion. 1025 if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI)) 1026 return false; 1027 1028 // A dead copy shouldn't show up here, but reject it just in case. 1029 if (MI->getOperand(0).isDead()) 1030 return false; 1031 1032 // All clear, widen the COPY. 1033 DEBUG(dbgs() << "widening: " << *MI); 1034 1035 // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg 1036 // or some other super-register. 1037 int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD); 1038 if (ImpDefIdx != -1) 1039 MI->RemoveOperand(ImpDefIdx); 1040 1041 // Change the opcode and operands. 1042 MI->setDesc(get(ARM::VMOVD)); 1043 MI->getOperand(0).setReg(DstRegD); 1044 MI->getOperand(1).setReg(SrcRegD); 1045 AddDefaultPred(MachineInstrBuilder(MI)); 1046 1047 // We are now reading SrcRegD instead of SrcRegS. This may upset the 1048 // register scavenger and machine verifier, so we need to indicate that we 1049 // are reading an undefined value from SrcRegD, but a proper value from 1050 // SrcRegS. 1051 MI->getOperand(1).setIsUndef(); 1052 MachineInstrBuilder(MI).addReg(SrcRegS, RegState::Implicit); 1053 1054 // SrcRegD may actually contain an unrelated value in the ssub_1 1055 // sub-register. Don't kill it. Only kill the ssub_0 sub-register. 1056 if (MI->getOperand(1).isKill()) { 1057 MI->getOperand(1).setIsKill(false); 1058 MI->addRegisterKilled(SrcRegS, TRI, true); 1059 } 1060 1061 DEBUG(dbgs() << "replaced by: " << *MI); 1062 return true; 1063 } 1064 1065 MachineInstr* 1066 ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, 1067 int FrameIx, uint64_t Offset, 1068 const MDNode *MDPtr, 1069 DebugLoc DL) const { 1070 MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE)) 1071 .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); 1072 return &*MIB; 1073 } 1074 1075 /// Create a copy of a const pool value. Update CPI to the new index and return 1076 /// the label UID. 1077 static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { 1078 MachineConstantPool *MCP = MF.getConstantPool(); 1079 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1080 1081 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; 1082 assert(MCPE.isMachineConstantPoolEntry() && 1083 "Expecting a machine constantpool entry!"); 1084 ARMConstantPoolValue *ACPV = 1085 static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); 1086 1087 unsigned PCLabelId = AFI->createPICLabelUId(); 1088 ARMConstantPoolValue *NewCPV = 0; 1089 // FIXME: The below assumes PIC relocation model and that the function 1090 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and 1091 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR 1092 // instructions, so that's probably OK, but is PIC always correct when 1093 // we get here? 1094 if (ACPV->isGlobalValue()) 1095 NewCPV = ARMConstantPoolConstant:: 1096 Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, 1097 ARMCP::CPValue, 4); 1098 else if (ACPV->isExtSymbol()) 1099 NewCPV = ARMConstantPoolSymbol:: 1100 Create(MF.getFunction()->getContext(), 1101 cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4); 1102 else if (ACPV->isBlockAddress()) 1103 NewCPV = ARMConstantPoolConstant:: 1104 Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId, 1105 ARMCP::CPBlockAddress, 4); 1106 else if (ACPV->isLSDA()) 1107 NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId, 1108 ARMCP::CPLSDA, 4); 1109 else if (ACPV->isMachineBasicBlock()) 1110 NewCPV = ARMConstantPoolMBB:: 1111 Create(MF.getFunction()->getContext(), 1112 cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4); 1113 else 1114 llvm_unreachable("Unexpected ARM constantpool value type!!"); 1115 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); 1116 return PCLabelId; 1117 } 1118 1119 void ARMBaseInstrInfo:: 1120 reMaterialize(MachineBasicBlock &MBB, 1121 MachineBasicBlock::iterator I, 1122 unsigned DestReg, unsigned SubIdx, 1123 const MachineInstr *Orig, 1124 const TargetRegisterInfo &TRI) const { 1125 unsigned Opcode = Orig->getOpcode(); 1126 switch (Opcode) { 1127 default: { 1128 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); 1129 MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); 1130 MBB.insert(I, MI); 1131 break; 1132 } 1133 case ARM::tLDRpci_pic: 1134 case ARM::t2LDRpci_pic: { 1135 MachineFunction &MF = *MBB.getParent(); 1136 unsigned CPI = Orig->getOperand(1).getIndex(); 1137 unsigned PCLabelId = duplicateCPV(MF, CPI); 1138 MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode), 1139 DestReg) 1140 .addConstantPoolIndex(CPI).addImm(PCLabelId); 1141 MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end()); 1142 break; 1143 } 1144 } 1145 } 1146 1147 MachineInstr * 1148 ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { 1149 MachineInstr *MI = TargetInstrInfoImpl::duplicate(Orig, MF); 1150 switch(Orig->getOpcode()) { 1151 case ARM::tLDRpci_pic: 1152 case ARM::t2LDRpci_pic: { 1153 unsigned CPI = Orig->getOperand(1).getIndex(); 1154 unsigned PCLabelId = duplicateCPV(MF, CPI); 1155 Orig->getOperand(1).setIndex(CPI); 1156 Orig->getOperand(2).setImm(PCLabelId); 1157 break; 1158 } 1159 } 1160 return MI; 1161 } 1162 1163 bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, 1164 const MachineInstr *MI1, 1165 const MachineRegisterInfo *MRI) const { 1166 int Opcode = MI0->getOpcode(); 1167 if (Opcode == ARM::t2LDRpci || 1168 Opcode == ARM::t2LDRpci_pic || 1169 Opcode == ARM::tLDRpci || 1170 Opcode == ARM::tLDRpci_pic || 1171 Opcode == ARM::MOV_ga_dyn || 1172 Opcode == ARM::MOV_ga_pcrel || 1173 Opcode == ARM::MOV_ga_pcrel_ldr || 1174 Opcode == ARM::t2MOV_ga_dyn || 1175 Opcode == ARM::t2MOV_ga_pcrel) { 1176 if (MI1->getOpcode() != Opcode) 1177 return false; 1178 if (MI0->getNumOperands() != MI1->getNumOperands()) 1179 return false; 1180 1181 const MachineOperand &MO0 = MI0->getOperand(1); 1182 const MachineOperand &MO1 = MI1->getOperand(1); 1183 if (MO0.getOffset() != MO1.getOffset()) 1184 return false; 1185 1186 if (Opcode == ARM::MOV_ga_dyn || 1187 Opcode == ARM::MOV_ga_pcrel || 1188 Opcode == ARM::MOV_ga_pcrel_ldr || 1189 Opcode == ARM::t2MOV_ga_dyn || 1190 Opcode == ARM::t2MOV_ga_pcrel) 1191 // Ignore the PC labels. 1192 return MO0.getGlobal() == MO1.getGlobal(); 1193 1194 const MachineFunction *MF = MI0->getParent()->getParent(); 1195 const MachineConstantPool *MCP = MF->getConstantPool(); 1196 int CPI0 = MO0.getIndex(); 1197 int CPI1 = MO1.getIndex(); 1198 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; 1199 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; 1200 bool isARMCP0 = MCPE0.isMachineConstantPoolEntry(); 1201 bool isARMCP1 = MCPE1.isMachineConstantPoolEntry(); 1202 if (isARMCP0 && isARMCP1) { 1203 ARMConstantPoolValue *ACPV0 = 1204 static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); 1205 ARMConstantPoolValue *ACPV1 = 1206 static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); 1207 return ACPV0->hasSameValue(ACPV1); 1208 } else if (!isARMCP0 && !isARMCP1) { 1209 return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal; 1210 } 1211 return false; 1212 } else if (Opcode == ARM::PICLDR) { 1213 if (MI1->getOpcode() != Opcode) 1214 return false; 1215 if (MI0->getNumOperands() != MI1->getNumOperands()) 1216 return false; 1217 1218 unsigned Addr0 = MI0->getOperand(1).getReg(); 1219 unsigned Addr1 = MI1->getOperand(1).getReg(); 1220 if (Addr0 != Addr1) { 1221 if (!MRI || 1222 !TargetRegisterInfo::isVirtualRegister(Addr0) || 1223 !TargetRegisterInfo::isVirtualRegister(Addr1)) 1224 return false; 1225 1226 // This assumes SSA form. 1227 MachineInstr *Def0 = MRI->getVRegDef(Addr0); 1228 MachineInstr *Def1 = MRI->getVRegDef(Addr1); 1229 // Check if the loaded value, e.g. a constantpool of a global address, are 1230 // the same. 1231 if (!produceSameValue(Def0, Def1, MRI)) 1232 return false; 1233 } 1234 1235 for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) { 1236 // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg 1237 const MachineOperand &MO0 = MI0->getOperand(i); 1238 const MachineOperand &MO1 = MI1->getOperand(i); 1239 if (!MO0.isIdenticalTo(MO1)) 1240 return false; 1241 } 1242 return true; 1243 } 1244 1245 return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); 1246 } 1247 1248 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to 1249 /// determine if two loads are loading from the same base address. It should 1250 /// only return true if the base pointers are the same and the only differences 1251 /// between the two addresses is the offset. It also returns the offsets by 1252 /// reference. 1253 bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 1254 int64_t &Offset1, 1255 int64_t &Offset2) const { 1256 // Don't worry about Thumb: just ARM and Thumb2. 1257 if (Subtarget.isThumb1Only()) return false; 1258 1259 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) 1260 return false; 1261 1262 switch (Load1->getMachineOpcode()) { 1263 default: 1264 return false; 1265 case ARM::LDRi12: 1266 case ARM::LDRBi12: 1267 case ARM::LDRD: 1268 case ARM::LDRH: 1269 case ARM::LDRSB: 1270 case ARM::LDRSH: 1271 case ARM::VLDRD: 1272 case ARM::VLDRS: 1273 case ARM::t2LDRi8: 1274 case ARM::t2LDRDi8: 1275 case ARM::t2LDRSHi8: 1276 case ARM::t2LDRi12: 1277 case ARM::t2LDRSHi12: 1278 break; 1279 } 1280 1281 switch (Load2->getMachineOpcode()) { 1282 default: 1283 return false; 1284 case ARM::LDRi12: 1285 case ARM::LDRBi12: 1286 case ARM::LDRD: 1287 case ARM::LDRH: 1288 case ARM::LDRSB: 1289 case ARM::LDRSH: 1290 case ARM::VLDRD: 1291 case ARM::VLDRS: 1292 case ARM::t2LDRi8: 1293 case ARM::t2LDRDi8: 1294 case ARM::t2LDRSHi8: 1295 case ARM::t2LDRi12: 1296 case ARM::t2LDRSHi12: 1297 break; 1298 } 1299 1300 // Check if base addresses and chain operands match. 1301 if (Load1->getOperand(0) != Load2->getOperand(0) || 1302 Load1->getOperand(4) != Load2->getOperand(4)) 1303 return false; 1304 1305 // Index should be Reg0. 1306 if (Load1->getOperand(3) != Load2->getOperand(3)) 1307 return false; 1308 1309 // Determine the offsets. 1310 if (isa<ConstantSDNode>(Load1->getOperand(1)) && 1311 isa<ConstantSDNode>(Load2->getOperand(1))) { 1312 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue(); 1313 Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue(); 1314 return true; 1315 } 1316 1317 return false; 1318 } 1319 1320 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to 1321 /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should 1322 /// be scheduled togther. On some targets if two loads are loading from 1323 /// addresses in the same cache line, it's better if they are scheduled 1324 /// together. This function takes two integers that represent the load offsets 1325 /// from the common base address. It returns true if it decides it's desirable 1326 /// to schedule the two loads together. "NumLoads" is the number of loads that 1327 /// have already been scheduled after Load1. 1328 bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, 1329 int64_t Offset1, int64_t Offset2, 1330 unsigned NumLoads) const { 1331 // Don't worry about Thumb: just ARM and Thumb2. 1332 if (Subtarget.isThumb1Only()) return false; 1333 1334 assert(Offset2 > Offset1); 1335 1336 if ((Offset2 - Offset1) / 8 > 64) 1337 return false; 1338 1339 if (Load1->getMachineOpcode() != Load2->getMachineOpcode()) 1340 return false; // FIXME: overly conservative? 1341 1342 // Four loads in a row should be sufficient. 1343 if (NumLoads >= 3) 1344 return false; 1345 1346 return true; 1347 } 1348 1349 bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, 1350 const MachineBasicBlock *MBB, 1351 const MachineFunction &MF) const { 1352 // Debug info is never a scheduling boundary. It's necessary to be explicit 1353 // due to the special treatment of IT instructions below, otherwise a 1354 // dbg_value followed by an IT will result in the IT instruction being 1355 // considered a scheduling hazard, which is wrong. It should be the actual 1356 // instruction preceding the dbg_value instruction(s), just like it is 1357 // when debug info is not present. 1358 if (MI->isDebugValue()) 1359 return false; 1360 1361 // Terminators and labels can't be scheduled around. 1362 if (MI->getDesc().isTerminator() || MI->isLabel()) 1363 return true; 1364 1365 // Treat the start of the IT block as a scheduling boundary, but schedule 1366 // t2IT along with all instructions following it. 1367 // FIXME: This is a big hammer. But the alternative is to add all potential 1368 // true and anti dependencies to IT block instructions as implicit operands 1369 // to the t2IT instruction. The added compile time and complexity does not 1370 // seem worth it. 1371 MachineBasicBlock::const_iterator I = MI; 1372 // Make sure to skip any dbg_value instructions 1373 while (++I != MBB->end() && I->isDebugValue()) 1374 ; 1375 if (I != MBB->end() && I->getOpcode() == ARM::t2IT) 1376 return true; 1377 1378 // Don't attempt to schedule around any instruction that defines 1379 // a stack-oriented pointer, as it's unlikely to be profitable. This 1380 // saves compile time, because it doesn't require every single 1381 // stack slot reference to depend on the instruction that does the 1382 // modification. 1383 if (MI->definesRegister(ARM::SP)) 1384 return true; 1385 1386 return false; 1387 } 1388 1389 bool ARMBaseInstrInfo:: 1390 isProfitableToIfCvt(MachineBasicBlock &MBB, 1391 unsigned NumCycles, unsigned ExtraPredCycles, 1392 const BranchProbability &Probability) const { 1393 if (!NumCycles) 1394 return false; 1395 1396 // Attempt to estimate the relative costs of predication versus branching. 1397 unsigned UnpredCost = Probability.getNumerator() * NumCycles; 1398 UnpredCost /= Probability.getDenominator(); 1399 UnpredCost += 1; // The branch itself 1400 UnpredCost += Subtarget.getMispredictionPenalty() / 10; 1401 1402 return (NumCycles + ExtraPredCycles) <= UnpredCost; 1403 } 1404 1405 bool ARMBaseInstrInfo:: 1406 isProfitableToIfCvt(MachineBasicBlock &TMBB, 1407 unsigned TCycles, unsigned TExtra, 1408 MachineBasicBlock &FMBB, 1409 unsigned FCycles, unsigned FExtra, 1410 const BranchProbability &Probability) const { 1411 if (!TCycles || !FCycles) 1412 return false; 1413 1414 // Attempt to estimate the relative costs of predication versus branching. 1415 unsigned TUnpredCost = Probability.getNumerator() * TCycles; 1416 TUnpredCost /= Probability.getDenominator(); 1417 1418 uint32_t Comp = Probability.getDenominator() - Probability.getNumerator(); 1419 unsigned FUnpredCost = Comp * FCycles; 1420 FUnpredCost /= Probability.getDenominator(); 1421 1422 unsigned UnpredCost = TUnpredCost + FUnpredCost; 1423 UnpredCost += 1; // The branch itself 1424 UnpredCost += Subtarget.getMispredictionPenalty() / 10; 1425 1426 return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost; 1427 } 1428 1429 /// getInstrPredicate - If instruction is predicated, returns its predicate 1430 /// condition, otherwise returns AL. It also returns the condition code 1431 /// register by reference. 1432 ARMCC::CondCodes 1433 llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { 1434 int PIdx = MI->findFirstPredOperandIdx(); 1435 if (PIdx == -1) { 1436 PredReg = 0; 1437 return ARMCC::AL; 1438 } 1439 1440 PredReg = MI->getOperand(PIdx+1).getReg(); 1441 return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm(); 1442 } 1443 1444 1445 int llvm::getMatchingCondBranchOpcode(int Opc) { 1446 if (Opc == ARM::B) 1447 return ARM::Bcc; 1448 else if (Opc == ARM::tB) 1449 return ARM::tBcc; 1450 else if (Opc == ARM::t2B) 1451 return ARM::t2Bcc; 1452 1453 llvm_unreachable("Unknown unconditional branch opcode!"); 1454 return 0; 1455 } 1456 1457 1458 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the 1459 /// instruction is encoded with an 'S' bit is determined by the optional CPSR 1460 /// def operand. 1461 /// 1462 /// This will go away once we can teach tblgen how to set the optional CPSR def 1463 /// operand itself. 1464 struct AddSubFlagsOpcodePair { 1465 unsigned PseudoOpc; 1466 unsigned MachineOpc; 1467 }; 1468 1469 static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { 1470 {ARM::ADDSri, ARM::ADDri}, 1471 {ARM::ADDSrr, ARM::ADDrr}, 1472 {ARM::ADDSrsi, ARM::ADDrsi}, 1473 {ARM::ADDSrsr, ARM::ADDrsr}, 1474 1475 {ARM::SUBSri, ARM::SUBri}, 1476 {ARM::SUBSrr, ARM::SUBrr}, 1477 {ARM::SUBSrsi, ARM::SUBrsi}, 1478 {ARM::SUBSrsr, ARM::SUBrsr}, 1479 1480 {ARM::RSBSri, ARM::RSBri}, 1481 {ARM::RSBSrr, ARM::RSBrr}, 1482 {ARM::RSBSrsi, ARM::RSBrsi}, 1483 {ARM::RSBSrsr, ARM::RSBrsr}, 1484 1485 {ARM::t2ADDSri, ARM::t2ADDri}, 1486 {ARM::t2ADDSrr, ARM::t2ADDrr}, 1487 {ARM::t2ADDSrs, ARM::t2ADDrs}, 1488 1489 {ARM::t2SUBSri, ARM::t2SUBri}, 1490 {ARM::t2SUBSrr, ARM::t2SUBrr}, 1491 {ARM::t2SUBSrs, ARM::t2SUBrs}, 1492 1493 {ARM::t2RSBSri, ARM::t2RSBri}, 1494 {ARM::t2RSBSrs, ARM::t2RSBrs}, 1495 }; 1496 1497 unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) { 1498 static const int NPairs = 1499 sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair); 1500 for (AddSubFlagsOpcodePair *OpcPair = &AddSubFlagsOpcodeMap[0], 1501 *End = &AddSubFlagsOpcodeMap[NPairs]; OpcPair != End; ++OpcPair) { 1502 if (OldOpc == OpcPair->PseudoOpc) { 1503 return OpcPair->MachineOpc; 1504 } 1505 } 1506 return 0; 1507 } 1508 1509 void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, 1510 MachineBasicBlock::iterator &MBBI, DebugLoc dl, 1511 unsigned DestReg, unsigned BaseReg, int NumBytes, 1512 ARMCC::CondCodes Pred, unsigned PredReg, 1513 const ARMBaseInstrInfo &TII, unsigned MIFlags) { 1514 bool isSub = NumBytes < 0; 1515 if (isSub) NumBytes = -NumBytes; 1516 1517 while (NumBytes) { 1518 unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); 1519 unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); 1520 assert(ThisVal && "Didn't extract field correctly"); 1521 1522 // We will handle these bits from offset, clear them. 1523 NumBytes &= ~ThisVal; 1524 1525 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?"); 1526 1527 // Build the new ADD / SUB. 1528 unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; 1529 BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) 1530 .addReg(BaseReg, RegState::Kill).addImm(ThisVal) 1531 .addImm((unsigned)Pred).addReg(PredReg).addReg(0) 1532 .setMIFlags(MIFlags); 1533 BaseReg = DestReg; 1534 } 1535 } 1536 1537 bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 1538 unsigned FrameReg, int &Offset, 1539 const ARMBaseInstrInfo &TII) { 1540 unsigned Opcode = MI.getOpcode(); 1541 const MCInstrDesc &Desc = MI.getDesc(); 1542 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); 1543 bool isSub = false; 1544 1545 // Memory operands in inline assembly always use AddrMode2. 1546 if (Opcode == ARM::INLINEASM) 1547 AddrMode = ARMII::AddrMode2; 1548 1549 if (Opcode == ARM::ADDri) { 1550 Offset += MI.getOperand(FrameRegIdx+1).getImm(); 1551 if (Offset == 0) { 1552 // Turn it into a move. 1553 MI.setDesc(TII.get(ARM::MOVr)); 1554 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 1555 MI.RemoveOperand(FrameRegIdx+1); 1556 Offset = 0; 1557 return true; 1558 } else if (Offset < 0) { 1559 Offset = -Offset; 1560 isSub = true; 1561 MI.setDesc(TII.get(ARM::SUBri)); 1562 } 1563 1564 // Common case: small offset, fits into instruction. 1565 if (ARM_AM::getSOImmVal(Offset) != -1) { 1566 // Replace the FrameIndex with sp / fp 1567 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 1568 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); 1569 Offset = 0; 1570 return true; 1571 } 1572 1573 // Otherwise, pull as much of the immedidate into this ADDri/SUBri 1574 // as possible. 1575 unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); 1576 unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt); 1577 1578 // We will handle these bits from offset, clear them. 1579 Offset &= ~ThisImmVal; 1580 1581 // Get the properly encoded SOImmVal field. 1582 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 && 1583 "Bit extraction didn't work?"); 1584 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal); 1585 } else { 1586 unsigned ImmIdx = 0; 1587 int InstrOffs = 0; 1588 unsigned NumBits = 0; 1589 unsigned Scale = 1; 1590 switch (AddrMode) { 1591 case ARMII::AddrMode_i12: { 1592 ImmIdx = FrameRegIdx + 1; 1593 InstrOffs = MI.getOperand(ImmIdx).getImm(); 1594 NumBits = 12; 1595 break; 1596 } 1597 case ARMII::AddrMode2: { 1598 ImmIdx = FrameRegIdx+2; 1599 InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); 1600 if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 1601 InstrOffs *= -1; 1602 NumBits = 12; 1603 break; 1604 } 1605 case ARMII::AddrMode3: { 1606 ImmIdx = FrameRegIdx+2; 1607 InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); 1608 if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 1609 InstrOffs *= -1; 1610 NumBits = 8; 1611 break; 1612 } 1613 case ARMII::AddrMode4: 1614 case ARMII::AddrMode6: 1615 // Can't fold any offset even if it's zero. 1616 return false; 1617 case ARMII::AddrMode5: { 1618 ImmIdx = FrameRegIdx+1; 1619 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); 1620 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 1621 InstrOffs *= -1; 1622 NumBits = 8; 1623 Scale = 4; 1624 break; 1625 } 1626 default: 1627 llvm_unreachable("Unsupported addressing mode!"); 1628 break; 1629 } 1630 1631 Offset += InstrOffs * Scale; 1632 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); 1633 if (Offset < 0) { 1634 Offset = -Offset; 1635 isSub = true; 1636 } 1637 1638 // Attempt to fold address comp. if opcode has offset bits 1639 if (NumBits > 0) { 1640 // Common case: small offset, fits into instruction. 1641 MachineOperand &ImmOp = MI.getOperand(ImmIdx); 1642 int ImmedOffset = Offset / Scale; 1643 unsigned Mask = (1 << NumBits) - 1; 1644 if ((unsigned)Offset <= Mask * Scale) { 1645 // Replace the FrameIndex with sp 1646 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 1647 // FIXME: When addrmode2 goes away, this will simplify (like the 1648 // T2 version), as the LDR.i12 versions don't need the encoding 1649 // tricks for the offset value. 1650 if (isSub) { 1651 if (AddrMode == ARMII::AddrMode_i12) 1652 ImmedOffset = -ImmedOffset; 1653 else 1654 ImmedOffset |= 1 << NumBits; 1655 } 1656 ImmOp.ChangeToImmediate(ImmedOffset); 1657 Offset = 0; 1658 return true; 1659 } 1660 1661 // Otherwise, it didn't fit. Pull in what we can to simplify the immed. 1662 ImmedOffset = ImmedOffset & Mask; 1663 if (isSub) { 1664 if (AddrMode == ARMII::AddrMode_i12) 1665 ImmedOffset = -ImmedOffset; 1666 else 1667 ImmedOffset |= 1 << NumBits; 1668 } 1669 ImmOp.ChangeToImmediate(ImmedOffset); 1670 Offset &= ~(Mask*Scale); 1671 } 1672 } 1673 1674 Offset = (isSub) ? -Offset : Offset; 1675 return Offset == 0; 1676 } 1677 1678 bool ARMBaseInstrInfo:: 1679 AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask, 1680 int &CmpValue) const { 1681 switch (MI->getOpcode()) { 1682 default: break; 1683 case ARM::CMPri: 1684 case ARM::t2CMPri: 1685 SrcReg = MI->getOperand(0).getReg(); 1686 CmpMask = ~0; 1687 CmpValue = MI->getOperand(1).getImm(); 1688 return true; 1689 case ARM::TSTri: 1690 case ARM::t2TSTri: 1691 SrcReg = MI->getOperand(0).getReg(); 1692 CmpMask = MI->getOperand(1).getImm(); 1693 CmpValue = 0; 1694 return true; 1695 } 1696 1697 return false; 1698 } 1699 1700 /// isSuitableForMask - Identify a suitable 'and' instruction that 1701 /// operates on the given source register and applies the same mask 1702 /// as a 'tst' instruction. Provide a limited look-through for copies. 1703 /// When successful, MI will hold the found instruction. 1704 static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, 1705 int CmpMask, bool CommonUse) { 1706 switch (MI->getOpcode()) { 1707 case ARM::ANDri: 1708 case ARM::t2ANDri: 1709 if (CmpMask != MI->getOperand(2).getImm()) 1710 return false; 1711 if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg()) 1712 return true; 1713 break; 1714 case ARM::COPY: { 1715 // Walk down one instruction which is potentially an 'and'. 1716 const MachineInstr &Copy = *MI; 1717 MachineBasicBlock::iterator AND( 1718 llvm::next(MachineBasicBlock::iterator(MI))); 1719 if (AND == MI->getParent()->end()) return false; 1720 MI = AND; 1721 return isSuitableForMask(MI, Copy.getOperand(0).getReg(), 1722 CmpMask, true); 1723 } 1724 } 1725 1726 return false; 1727 } 1728 1729 /// OptimizeCompareInstr - Convert the instruction supplying the argument to the 1730 /// comparison into one that sets the zero bit in the flags register. 1731 bool ARMBaseInstrInfo:: 1732 OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, 1733 int CmpValue, const MachineRegisterInfo *MRI) const { 1734 if (CmpValue != 0) 1735 return false; 1736 1737 MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg); 1738 if (llvm::next(DI) != MRI->def_end()) 1739 // Only support one definition. 1740 return false; 1741 1742 MachineInstr *MI = &*DI; 1743 1744 // Masked compares sometimes use the same register as the corresponding 'and'. 1745 if (CmpMask != ~0) { 1746 if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) { 1747 MI = 0; 1748 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg), 1749 UE = MRI->use_end(); UI != UE; ++UI) { 1750 if (UI->getParent() != CmpInstr->getParent()) continue; 1751 MachineInstr *PotentialAND = &*UI; 1752 if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true)) 1753 continue; 1754 MI = PotentialAND; 1755 break; 1756 } 1757 if (!MI) return false; 1758 } 1759 } 1760 1761 // Conservatively refuse to convert an instruction which isn't in the same BB 1762 // as the comparison. 1763 if (MI->getParent() != CmpInstr->getParent()) 1764 return false; 1765 1766 // Check that CPSR isn't set between the comparison instruction and the one we 1767 // want to change. 1768 MachineBasicBlock::const_iterator I = CmpInstr, E = MI, 1769 B = MI->getParent()->begin(); 1770 1771 // Early exit if CmpInstr is at the beginning of the BB. 1772 if (I == B) return false; 1773 1774 --I; 1775 for (; I != E; --I) { 1776 const MachineInstr &Instr = *I; 1777 1778 for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) { 1779 const MachineOperand &MO = Instr.getOperand(IO); 1780 if (!MO.isReg()) continue; 1781 1782 // This instruction modifies or uses CPSR after the one we want to 1783 // change. We can't do this transformation. 1784 if (MO.getReg() == ARM::CPSR) 1785 return false; 1786 } 1787 1788 if (I == B) 1789 // The 'and' is below the comparison instruction. 1790 return false; 1791 } 1792 1793 // Set the "zero" bit in CPSR. 1794 switch (MI->getOpcode()) { 1795 default: break; 1796 case ARM::RSBrr: 1797 case ARM::RSBri: 1798 case ARM::RSCrr: 1799 case ARM::RSCri: 1800 case ARM::ADDrr: 1801 case ARM::ADDri: 1802 case ARM::ADCrr: 1803 case ARM::ADCri: 1804 case ARM::SUBrr: 1805 case ARM::SUBri: 1806 case ARM::SBCrr: 1807 case ARM::SBCri: 1808 case ARM::t2RSBri: 1809 case ARM::t2ADDrr: 1810 case ARM::t2ADDri: 1811 case ARM::t2ADCrr: 1812 case ARM::t2ADCri: 1813 case ARM::t2SUBrr: 1814 case ARM::t2SUBri: 1815 case ARM::t2SBCrr: 1816 case ARM::t2SBCri: 1817 case ARM::ANDrr: 1818 case ARM::ANDri: 1819 case ARM::t2ANDrr: 1820 case ARM::t2ANDri: 1821 case ARM::ORRrr: 1822 case ARM::ORRri: 1823 case ARM::t2ORRrr: 1824 case ARM::t2ORRri: 1825 case ARM::EORrr: 1826 case ARM::EORri: 1827 case ARM::t2EORrr: 1828 case ARM::t2EORri: { 1829 // Scan forward for the use of CPSR, if it's a conditional code requires 1830 // checking of V bit, then this is not safe to do. If we can't find the 1831 // CPSR use (i.e. used in another block), then it's not safe to perform 1832 // the optimization. 1833 bool isSafe = false; 1834 I = CmpInstr; 1835 E = MI->getParent()->end(); 1836 while (!isSafe && ++I != E) { 1837 const MachineInstr &Instr = *I; 1838 for (unsigned IO = 0, EO = Instr.getNumOperands(); 1839 !isSafe && IO != EO; ++IO) { 1840 const MachineOperand &MO = Instr.getOperand(IO); 1841 if (!MO.isReg() || MO.getReg() != ARM::CPSR) 1842 continue; 1843 if (MO.isDef()) { 1844 isSafe = true; 1845 break; 1846 } 1847 // Condition code is after the operand before CPSR. 1848 ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm(); 1849 switch (CC) { 1850 default: 1851 isSafe = true; 1852 break; 1853 case ARMCC::VS: 1854 case ARMCC::VC: 1855 case ARMCC::GE: 1856 case ARMCC::LT: 1857 case ARMCC::GT: 1858 case ARMCC::LE: 1859 return false; 1860 } 1861 } 1862 } 1863 1864 if (!isSafe) 1865 return false; 1866 1867 // Toggle the optional operand to CPSR. 1868 MI->getOperand(5).setReg(ARM::CPSR); 1869 MI->getOperand(5).setIsDef(true); 1870 CmpInstr->eraseFromParent(); 1871 return true; 1872 } 1873 } 1874 1875 return false; 1876 } 1877 1878 bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, 1879 MachineInstr *DefMI, unsigned Reg, 1880 MachineRegisterInfo *MRI) const { 1881 // Fold large immediates into add, sub, or, xor. 1882 unsigned DefOpc = DefMI->getOpcode(); 1883 if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm) 1884 return false; 1885 if (!DefMI->getOperand(1).isImm()) 1886 // Could be t2MOVi32imm <ga:xx> 1887 return false; 1888 1889 if (!MRI->hasOneNonDBGUse(Reg)) 1890 return false; 1891 1892 unsigned UseOpc = UseMI->getOpcode(); 1893 unsigned NewUseOpc = 0; 1894 uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm(); 1895 uint32_t SOImmValV1 = 0, SOImmValV2 = 0; 1896 bool Commute = false; 1897 switch (UseOpc) { 1898 default: return false; 1899 case ARM::SUBrr: 1900 case ARM::ADDrr: 1901 case ARM::ORRrr: 1902 case ARM::EORrr: 1903 case ARM::t2SUBrr: 1904 case ARM::t2ADDrr: 1905 case ARM::t2ORRrr: 1906 case ARM::t2EORrr: { 1907 Commute = UseMI->getOperand(2).getReg() != Reg; 1908 switch (UseOpc) { 1909 default: break; 1910 case ARM::SUBrr: { 1911 if (Commute) 1912 return false; 1913 ImmVal = -ImmVal; 1914 NewUseOpc = ARM::SUBri; 1915 // Fallthrough 1916 } 1917 case ARM::ADDrr: 1918 case ARM::ORRrr: 1919 case ARM::EORrr: { 1920 if (!ARM_AM::isSOImmTwoPartVal(ImmVal)) 1921 return false; 1922 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); 1923 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); 1924 switch (UseOpc) { 1925 default: break; 1926 case ARM::ADDrr: NewUseOpc = ARM::ADDri; break; 1927 case ARM::ORRrr: NewUseOpc = ARM::ORRri; break; 1928 case ARM::EORrr: NewUseOpc = ARM::EORri; break; 1929 } 1930 break; 1931 } 1932 case ARM::t2SUBrr: { 1933 if (Commute) 1934 return false; 1935 ImmVal = -ImmVal; 1936 NewUseOpc = ARM::t2SUBri; 1937 // Fallthrough 1938 } 1939 case ARM::t2ADDrr: 1940 case ARM::t2ORRrr: 1941 case ARM::t2EORrr: { 1942 if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal)) 1943 return false; 1944 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); 1945 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); 1946 switch (UseOpc) { 1947 default: break; 1948 case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break; 1949 case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break; 1950 case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break; 1951 } 1952 break; 1953 } 1954 } 1955 } 1956 } 1957 1958 unsigned OpIdx = Commute ? 2 : 1; 1959 unsigned Reg1 = UseMI->getOperand(OpIdx).getReg(); 1960 bool isKill = UseMI->getOperand(OpIdx).isKill(); 1961 unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); 1962 AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(), 1963 *UseMI, UseMI->getDebugLoc(), 1964 get(NewUseOpc), NewReg) 1965 .addReg(Reg1, getKillRegState(isKill)) 1966 .addImm(SOImmValV1))); 1967 UseMI->setDesc(get(NewUseOpc)); 1968 UseMI->getOperand(1).setReg(NewReg); 1969 UseMI->getOperand(1).setIsKill(); 1970 UseMI->getOperand(2).ChangeToImmediate(SOImmValV2); 1971 DefMI->eraseFromParent(); 1972 return true; 1973 } 1974 1975 unsigned 1976 ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, 1977 const MachineInstr *MI) const { 1978 if (!ItinData || ItinData->isEmpty()) 1979 return 1; 1980 1981 const MCInstrDesc &Desc = MI->getDesc(); 1982 unsigned Class = Desc.getSchedClass(); 1983 unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; 1984 if (UOps) 1985 return UOps; 1986 1987 unsigned Opc = MI->getOpcode(); 1988 switch (Opc) { 1989 default: 1990 llvm_unreachable("Unexpected multi-uops instruction!"); 1991 break; 1992 case ARM::VLDMQIA: 1993 case ARM::VSTMQIA: 1994 return 2; 1995 1996 // The number of uOps for load / store multiple are determined by the number 1997 // registers. 1998 // 1999 // On Cortex-A8, each pair of register loads / stores can be scheduled on the 2000 // same cycle. The scheduling for the first load / store must be done 2001 // separately by assuming the the address is not 64-bit aligned. 2002 // 2003 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address 2004 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON 2005 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1. 2006 case ARM::VLDMDIA: 2007 case ARM::VLDMDIA_UPD: 2008 case ARM::VLDMDDB_UPD: 2009 case ARM::VLDMSIA: 2010 case ARM::VLDMSIA_UPD: 2011 case ARM::VLDMSDB_UPD: 2012 case ARM::VSTMDIA: 2013 case ARM::VSTMDIA_UPD: 2014 case ARM::VSTMDDB_UPD: 2015 case ARM::VSTMSIA: 2016 case ARM::VSTMSIA_UPD: 2017 case ARM::VSTMSDB_UPD: { 2018 unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands(); 2019 return (NumRegs / 2) + (NumRegs % 2) + 1; 2020 } 2021 2022 case ARM::LDMIA_RET: 2023 case ARM::LDMIA: 2024 case ARM::LDMDA: 2025 case ARM::LDMDB: 2026 case ARM::LDMIB: 2027 case ARM::LDMIA_UPD: 2028 case ARM::LDMDA_UPD: 2029 case ARM::LDMDB_UPD: 2030 case ARM::LDMIB_UPD: 2031 case ARM::STMIA: 2032 case ARM::STMDA: 2033 case ARM::STMDB: 2034 case ARM::STMIB: 2035 case ARM::STMIA_UPD: 2036 case ARM::STMDA_UPD: 2037 case ARM::STMDB_UPD: 2038 case ARM::STMIB_UPD: 2039 case ARM::tLDMIA: 2040 case ARM::tLDMIA_UPD: 2041 case ARM::tSTMIA_UPD: 2042 case ARM::tPOP_RET: 2043 case ARM::tPOP: 2044 case ARM::tPUSH: 2045 case ARM::t2LDMIA_RET: 2046 case ARM::t2LDMIA: 2047 case ARM::t2LDMDB: 2048 case ARM::t2LDMIA_UPD: 2049 case ARM::t2LDMDB_UPD: 2050 case ARM::t2STMIA: 2051 case ARM::t2STMDB: 2052 case ARM::t2STMIA_UPD: 2053 case ARM::t2STMDB_UPD: { 2054 unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1; 2055 if (Subtarget.isCortexA8()) { 2056 if (NumRegs < 4) 2057 return 2; 2058 // 4 registers would be issued: 2, 2. 2059 // 5 registers would be issued: 2, 2, 1. 2060 UOps = (NumRegs / 2); 2061 if (NumRegs % 2) 2062 ++UOps; 2063 return UOps; 2064 } else if (Subtarget.isCortexA9()) { 2065 UOps = (NumRegs / 2); 2066 // If there are odd number of registers or if it's not 64-bit aligned, 2067 // then it takes an extra AGU (Address Generation Unit) cycle. 2068 if ((NumRegs % 2) || 2069 !MI->hasOneMemOperand() || 2070 (*MI->memoperands_begin())->getAlignment() < 8) 2071 ++UOps; 2072 return UOps; 2073 } else { 2074 // Assume the worst. 2075 return NumRegs; 2076 } 2077 } 2078 } 2079 } 2080 2081 int 2082 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, 2083 const MCInstrDesc &DefMCID, 2084 unsigned DefClass, 2085 unsigned DefIdx, unsigned DefAlign) const { 2086 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; 2087 if (RegNo <= 0) 2088 // Def is the address writeback. 2089 return ItinData->getOperandCycle(DefClass, DefIdx); 2090 2091 int DefCycle; 2092 if (Subtarget.isCortexA8()) { 2093 // (regno / 2) + (regno % 2) + 1 2094 DefCycle = RegNo / 2 + 1; 2095 if (RegNo % 2) 2096 ++DefCycle; 2097 } else if (Subtarget.isCortexA9()) { 2098 DefCycle = RegNo; 2099 bool isSLoad = false; 2100 2101 switch (DefMCID.getOpcode()) { 2102 default: break; 2103 case ARM::VLDMSIA: 2104 case ARM::VLDMSIA_UPD: 2105 case ARM::VLDMSDB_UPD: 2106 isSLoad = true; 2107 break; 2108 } 2109 2110 // If there are odd number of 'S' registers or if it's not 64-bit aligned, 2111 // then it takes an extra cycle. 2112 if ((isSLoad && (RegNo % 2)) || DefAlign < 8) 2113 ++DefCycle; 2114 } else { 2115 // Assume the worst. 2116 DefCycle = RegNo + 2; 2117 } 2118 2119 return DefCycle; 2120 } 2121 2122 int 2123 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, 2124 const MCInstrDesc &DefMCID, 2125 unsigned DefClass, 2126 unsigned DefIdx, unsigned DefAlign) const { 2127 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; 2128 if (RegNo <= 0) 2129 // Def is the address writeback. 2130 return ItinData->getOperandCycle(DefClass, DefIdx); 2131 2132 int DefCycle; 2133 if (Subtarget.isCortexA8()) { 2134 // 4 registers would be issued: 1, 2, 1. 2135 // 5 registers would be issued: 1, 2, 2. 2136 DefCycle = RegNo / 2; 2137 if (DefCycle < 1) 2138 DefCycle = 1; 2139 // Result latency is issue cycle + 2: E2. 2140 DefCycle += 2; 2141 } else if (Subtarget.isCortexA9()) { 2142 DefCycle = (RegNo / 2); 2143 // If there are odd number of registers or if it's not 64-bit aligned, 2144 // then it takes an extra AGU (Address Generation Unit) cycle. 2145 if ((RegNo % 2) || DefAlign < 8) 2146 ++DefCycle; 2147 // Result latency is AGU cycles + 2. 2148 DefCycle += 2; 2149 } else { 2150 // Assume the worst. 2151 DefCycle = RegNo + 2; 2152 } 2153 2154 return DefCycle; 2155 } 2156 2157 int 2158 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, 2159 const MCInstrDesc &UseMCID, 2160 unsigned UseClass, 2161 unsigned UseIdx, unsigned UseAlign) const { 2162 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; 2163 if (RegNo <= 0) 2164 return ItinData->getOperandCycle(UseClass, UseIdx); 2165 2166 int UseCycle; 2167 if (Subtarget.isCortexA8()) { 2168 // (regno / 2) + (regno % 2) + 1 2169 UseCycle = RegNo / 2 + 1; 2170 if (RegNo % 2) 2171 ++UseCycle; 2172 } else if (Subtarget.isCortexA9()) { 2173 UseCycle = RegNo; 2174 bool isSStore = false; 2175 2176 switch (UseMCID.getOpcode()) { 2177 default: break; 2178 case ARM::VSTMSIA: 2179 case ARM::VSTMSIA_UPD: 2180 case ARM::VSTMSDB_UPD: 2181 isSStore = true; 2182 break; 2183 } 2184 2185 // If there are odd number of 'S' registers or if it's not 64-bit aligned, 2186 // then it takes an extra cycle. 2187 if ((isSStore && (RegNo % 2)) || UseAlign < 8) 2188 ++UseCycle; 2189 } else { 2190 // Assume the worst. 2191 UseCycle = RegNo + 2; 2192 } 2193 2194 return UseCycle; 2195 } 2196 2197 int 2198 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, 2199 const MCInstrDesc &UseMCID, 2200 unsigned UseClass, 2201 unsigned UseIdx, unsigned UseAlign) const { 2202 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; 2203 if (RegNo <= 0) 2204 return ItinData->getOperandCycle(UseClass, UseIdx); 2205 2206 int UseCycle; 2207 if (Subtarget.isCortexA8()) { 2208 UseCycle = RegNo / 2; 2209 if (UseCycle < 2) 2210 UseCycle = 2; 2211 // Read in E3. 2212 UseCycle += 2; 2213 } else if (Subtarget.isCortexA9()) { 2214 UseCycle = (RegNo / 2); 2215 // If there are odd number of registers or if it's not 64-bit aligned, 2216 // then it takes an extra AGU (Address Generation Unit) cycle. 2217 if ((RegNo % 2) || UseAlign < 8) 2218 ++UseCycle; 2219 } else { 2220 // Assume the worst. 2221 UseCycle = 1; 2222 } 2223 return UseCycle; 2224 } 2225 2226 int 2227 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 2228 const MCInstrDesc &DefMCID, 2229 unsigned DefIdx, unsigned DefAlign, 2230 const MCInstrDesc &UseMCID, 2231 unsigned UseIdx, unsigned UseAlign) const { 2232 unsigned DefClass = DefMCID.getSchedClass(); 2233 unsigned UseClass = UseMCID.getSchedClass(); 2234 2235 if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands()) 2236 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); 2237 2238 // This may be a def / use of a variable_ops instruction, the operand 2239 // latency might be determinable dynamically. Let the target try to 2240 // figure it out. 2241 int DefCycle = -1; 2242 bool LdmBypass = false; 2243 switch (DefMCID.getOpcode()) { 2244 default: 2245 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 2246 break; 2247 2248 case ARM::VLDMDIA: 2249 case ARM::VLDMDIA_UPD: 2250 case ARM::VLDMDDB_UPD: 2251 case ARM::VLDMSIA: 2252 case ARM::VLDMSIA_UPD: 2253 case ARM::VLDMSDB_UPD: 2254 DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); 2255 break; 2256 2257 case ARM::LDMIA_RET: 2258 case ARM::LDMIA: 2259 case ARM::LDMDA: 2260 case ARM::LDMDB: 2261 case ARM::LDMIB: 2262 case ARM::LDMIA_UPD: 2263 case ARM::LDMDA_UPD: 2264 case ARM::LDMDB_UPD: 2265 case ARM::LDMIB_UPD: 2266 case ARM::tLDMIA: 2267 case ARM::tLDMIA_UPD: 2268 case ARM::tPUSH: 2269 case ARM::t2LDMIA_RET: 2270 case ARM::t2LDMIA: 2271 case ARM::t2LDMDB: 2272 case ARM::t2LDMIA_UPD: 2273 case ARM::t2LDMDB_UPD: 2274 LdmBypass = 1; 2275 DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); 2276 break; 2277 } 2278 2279 if (DefCycle == -1) 2280 // We can't seem to determine the result latency of the def, assume it's 2. 2281 DefCycle = 2; 2282 2283 int UseCycle = -1; 2284 switch (UseMCID.getOpcode()) { 2285 default: 2286 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx); 2287 break; 2288 2289 case ARM::VSTMDIA: 2290 case ARM::VSTMDIA_UPD: 2291 case ARM::VSTMDDB_UPD: 2292 case ARM::VSTMSIA: 2293 case ARM::VSTMSIA_UPD: 2294 case ARM::VSTMSDB_UPD: 2295 UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); 2296 break; 2297 2298 case ARM::STMIA: 2299 case ARM::STMDA: 2300 case ARM::STMDB: 2301 case ARM::STMIB: 2302 case ARM::STMIA_UPD: 2303 case ARM::STMDA_UPD: 2304 case ARM::STMDB_UPD: 2305 case ARM::STMIB_UPD: 2306 case ARM::tSTMIA_UPD: 2307 case ARM::tPOP_RET: 2308 case ARM::tPOP: 2309 case ARM::t2STMIA: 2310 case ARM::t2STMDB: 2311 case ARM::t2STMIA_UPD: 2312 case ARM::t2STMDB_UPD: 2313 UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); 2314 break; 2315 } 2316 2317 if (UseCycle == -1) 2318 // Assume it's read in the first stage. 2319 UseCycle = 1; 2320 2321 UseCycle = DefCycle - UseCycle + 1; 2322 if (UseCycle > 0) { 2323 if (LdmBypass) { 2324 // It's a variable_ops instruction so we can't use DefIdx here. Just use 2325 // first def operand. 2326 if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1, 2327 UseClass, UseIdx)) 2328 --UseCycle; 2329 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx, 2330 UseClass, UseIdx)) { 2331 --UseCycle; 2332 } 2333 } 2334 2335 return UseCycle; 2336 } 2337 2338 int 2339 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 2340 const MachineInstr *DefMI, unsigned DefIdx, 2341 const MachineInstr *UseMI, unsigned UseIdx) const { 2342 if (DefMI->isCopyLike() || DefMI->isInsertSubreg() || 2343 DefMI->isRegSequence() || DefMI->isImplicitDef()) 2344 return 1; 2345 2346 const MCInstrDesc &DefMCID = DefMI->getDesc(); 2347 if (!ItinData || ItinData->isEmpty()) 2348 return DefMCID.mayLoad() ? 3 : 1; 2349 2350 const MCInstrDesc &UseMCID = UseMI->getDesc(); 2351 const MachineOperand &DefMO = DefMI->getOperand(DefIdx); 2352 if (DefMO.getReg() == ARM::CPSR) { 2353 if (DefMI->getOpcode() == ARM::FMSTAT) { 2354 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) 2355 return Subtarget.isCortexA9() ? 1 : 20; 2356 } 2357 2358 // CPSR set and branch can be paired in the same cycle. 2359 if (UseMCID.isBranch()) 2360 return 0; 2361 } 2362 2363 unsigned DefAlign = DefMI->hasOneMemOperand() 2364 ? (*DefMI->memoperands_begin())->getAlignment() : 0; 2365 unsigned UseAlign = UseMI->hasOneMemOperand() 2366 ? (*UseMI->memoperands_begin())->getAlignment() : 0; 2367 int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, 2368 UseMCID, UseIdx, UseAlign); 2369 2370 if (Latency > 1 && 2371 (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { 2372 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 2373 // variants are one cycle cheaper. 2374 switch (DefMCID.getOpcode()) { 2375 default: break; 2376 case ARM::LDRrs: 2377 case ARM::LDRBrs: { 2378 unsigned ShOpVal = DefMI->getOperand(3).getImm(); 2379 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 2380 if (ShImm == 0 || 2381 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 2382 --Latency; 2383 break; 2384 } 2385 case ARM::t2LDRs: 2386 case ARM::t2LDRBs: 2387 case ARM::t2LDRHs: 2388 case ARM::t2LDRSHs: { 2389 // Thumb2 mode: lsl only. 2390 unsigned ShAmt = DefMI->getOperand(3).getImm(); 2391 if (ShAmt == 0 || ShAmt == 2) 2392 --Latency; 2393 break; 2394 } 2395 } 2396 } 2397 2398 if (DefAlign < 8 && Subtarget.isCortexA9()) 2399 switch (DefMCID.getOpcode()) { 2400 default: break; 2401 case ARM::VLD1q8: 2402 case ARM::VLD1q16: 2403 case ARM::VLD1q32: 2404 case ARM::VLD1q64: 2405 case ARM::VLD1q8_UPD: 2406 case ARM::VLD1q16_UPD: 2407 case ARM::VLD1q32_UPD: 2408 case ARM::VLD1q64_UPD: 2409 case ARM::VLD2d8: 2410 case ARM::VLD2d16: 2411 case ARM::VLD2d32: 2412 case ARM::VLD2q8: 2413 case ARM::VLD2q16: 2414 case ARM::VLD2q32: 2415 case ARM::VLD2d8_UPD: 2416 case ARM::VLD2d16_UPD: 2417 case ARM::VLD2d32_UPD: 2418 case ARM::VLD2q8_UPD: 2419 case ARM::VLD2q16_UPD: 2420 case ARM::VLD2q32_UPD: 2421 case ARM::VLD3d8: 2422 case ARM::VLD3d16: 2423 case ARM::VLD3d32: 2424 case ARM::VLD1d64T: 2425 case ARM::VLD3d8_UPD: 2426 case ARM::VLD3d16_UPD: 2427 case ARM::VLD3d32_UPD: 2428 case ARM::VLD1d64T_UPD: 2429 case ARM::VLD3q8_UPD: 2430 case ARM::VLD3q16_UPD: 2431 case ARM::VLD3q32_UPD: 2432 case ARM::VLD4d8: 2433 case ARM::VLD4d16: 2434 case ARM::VLD4d32: 2435 case ARM::VLD1d64Q: 2436 case ARM::VLD4d8_UPD: 2437 case ARM::VLD4d16_UPD: 2438 case ARM::VLD4d32_UPD: 2439 case ARM::VLD1d64Q_UPD: 2440 case ARM::VLD4q8_UPD: 2441 case ARM::VLD4q16_UPD: 2442 case ARM::VLD4q32_UPD: 2443 case ARM::VLD1DUPq8: 2444 case ARM::VLD1DUPq16: 2445 case ARM::VLD1DUPq32: 2446 case ARM::VLD1DUPq8_UPD: 2447 case ARM::VLD1DUPq16_UPD: 2448 case ARM::VLD1DUPq32_UPD: 2449 case ARM::VLD2DUPd8: 2450 case ARM::VLD2DUPd16: 2451 case ARM::VLD2DUPd32: 2452 case ARM::VLD2DUPd8_UPD: 2453 case ARM::VLD2DUPd16_UPD: 2454 case ARM::VLD2DUPd32_UPD: 2455 case ARM::VLD4DUPd8: 2456 case ARM::VLD4DUPd16: 2457 case ARM::VLD4DUPd32: 2458 case ARM::VLD4DUPd8_UPD: 2459 case ARM::VLD4DUPd16_UPD: 2460 case ARM::VLD4DUPd32_UPD: 2461 case ARM::VLD1LNd8: 2462 case ARM::VLD1LNd16: 2463 case ARM::VLD1LNd32: 2464 case ARM::VLD1LNd8_UPD: 2465 case ARM::VLD1LNd16_UPD: 2466 case ARM::VLD1LNd32_UPD: 2467 case ARM::VLD2LNd8: 2468 case ARM::VLD2LNd16: 2469 case ARM::VLD2LNd32: 2470 case ARM::VLD2LNq16: 2471 case ARM::VLD2LNq32: 2472 case ARM::VLD2LNd8_UPD: 2473 case ARM::VLD2LNd16_UPD: 2474 case ARM::VLD2LNd32_UPD: 2475 case ARM::VLD2LNq16_UPD: 2476 case ARM::VLD2LNq32_UPD: 2477 case ARM::VLD4LNd8: 2478 case ARM::VLD4LNd16: 2479 case ARM::VLD4LNd32: 2480 case ARM::VLD4LNq16: 2481 case ARM::VLD4LNq32: 2482 case ARM::VLD4LNd8_UPD: 2483 case ARM::VLD4LNd16_UPD: 2484 case ARM::VLD4LNd32_UPD: 2485 case ARM::VLD4LNq16_UPD: 2486 case ARM::VLD4LNq32_UPD: 2487 // If the address is not 64-bit aligned, the latencies of these 2488 // instructions increases by one. 2489 ++Latency; 2490 break; 2491 } 2492 2493 return Latency; 2494 } 2495 2496 int 2497 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 2498 SDNode *DefNode, unsigned DefIdx, 2499 SDNode *UseNode, unsigned UseIdx) const { 2500 if (!DefNode->isMachineOpcode()) 2501 return 1; 2502 2503 const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode()); 2504 2505 if (isZeroCost(DefMCID.Opcode)) 2506 return 0; 2507 2508 if (!ItinData || ItinData->isEmpty()) 2509 return DefMCID.mayLoad() ? 3 : 1; 2510 2511 if (!UseNode->isMachineOpcode()) { 2512 int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); 2513 if (Subtarget.isCortexA9()) 2514 return Latency <= 2 ? 1 : Latency - 1; 2515 else 2516 return Latency <= 3 ? 1 : Latency - 2; 2517 } 2518 2519 const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode()); 2520 const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode); 2521 unsigned DefAlign = !DefMN->memoperands_empty() 2522 ? (*DefMN->memoperands_begin())->getAlignment() : 0; 2523 const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode); 2524 unsigned UseAlign = !UseMN->memoperands_empty() 2525 ? (*UseMN->memoperands_begin())->getAlignment() : 0; 2526 int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, 2527 UseMCID, UseIdx, UseAlign); 2528 2529 if (Latency > 1 && 2530 (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { 2531 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 2532 // variants are one cycle cheaper. 2533 switch (DefMCID.getOpcode()) { 2534 default: break; 2535 case ARM::LDRrs: 2536 case ARM::LDRBrs: { 2537 unsigned ShOpVal = 2538 cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 2539 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 2540 if (ShImm == 0 || 2541 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 2542 --Latency; 2543 break; 2544 } 2545 case ARM::t2LDRs: 2546 case ARM::t2LDRBs: 2547 case ARM::t2LDRHs: 2548 case ARM::t2LDRSHs: { 2549 // Thumb2 mode: lsl only. 2550 unsigned ShAmt = 2551 cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 2552 if (ShAmt == 0 || ShAmt == 2) 2553 --Latency; 2554 break; 2555 } 2556 } 2557 } 2558 2559 if (DefAlign < 8 && Subtarget.isCortexA9()) 2560 switch (DefMCID.getOpcode()) { 2561 default: break; 2562 case ARM::VLD1q8Pseudo: 2563 case ARM::VLD1q16Pseudo: 2564 case ARM::VLD1q32Pseudo: 2565 case ARM::VLD1q64Pseudo: 2566 case ARM::VLD1q8Pseudo_UPD: 2567 case ARM::VLD1q16Pseudo_UPD: 2568 case ARM::VLD1q32Pseudo_UPD: 2569 case ARM::VLD1q64Pseudo_UPD: 2570 case ARM::VLD2d8Pseudo: 2571 case ARM::VLD2d16Pseudo: 2572 case ARM::VLD2d32Pseudo: 2573 case ARM::VLD2q8Pseudo: 2574 case ARM::VLD2q16Pseudo: 2575 case ARM::VLD2q32Pseudo: 2576 case ARM::VLD2d8Pseudo_UPD: 2577 case ARM::VLD2d16Pseudo_UPD: 2578 case ARM::VLD2d32Pseudo_UPD: 2579 case ARM::VLD2q8Pseudo_UPD: 2580 case ARM::VLD2q16Pseudo_UPD: 2581 case ARM::VLD2q32Pseudo_UPD: 2582 case ARM::VLD3d8Pseudo: 2583 case ARM::VLD3d16Pseudo: 2584 case ARM::VLD3d32Pseudo: 2585 case ARM::VLD1d64TPseudo: 2586 case ARM::VLD3d8Pseudo_UPD: 2587 case ARM::VLD3d16Pseudo_UPD: 2588 case ARM::VLD3d32Pseudo_UPD: 2589 case ARM::VLD1d64TPseudo_UPD: 2590 case ARM::VLD3q8Pseudo_UPD: 2591 case ARM::VLD3q16Pseudo_UPD: 2592 case ARM::VLD3q32Pseudo_UPD: 2593 case ARM::VLD3q8oddPseudo: 2594 case ARM::VLD3q16oddPseudo: 2595 case ARM::VLD3q32oddPseudo: 2596 case ARM::VLD3q8oddPseudo_UPD: 2597 case ARM::VLD3q16oddPseudo_UPD: 2598 case ARM::VLD3q32oddPseudo_UPD: 2599 case ARM::VLD4d8Pseudo: 2600 case ARM::VLD4d16Pseudo: 2601 case ARM::VLD4d32Pseudo: 2602 case ARM::VLD1d64QPseudo: 2603 case ARM::VLD4d8Pseudo_UPD: 2604 case ARM::VLD4d16Pseudo_UPD: 2605 case ARM::VLD4d32Pseudo_UPD: 2606 case ARM::VLD1d64QPseudo_UPD: 2607 case ARM::VLD4q8Pseudo_UPD: 2608 case ARM::VLD4q16Pseudo_UPD: 2609 case ARM::VLD4q32Pseudo_UPD: 2610 case ARM::VLD4q8oddPseudo: 2611 case ARM::VLD4q16oddPseudo: 2612 case ARM::VLD4q32oddPseudo: 2613 case ARM::VLD4q8oddPseudo_UPD: 2614 case ARM::VLD4q16oddPseudo_UPD: 2615 case ARM::VLD4q32oddPseudo_UPD: 2616 case ARM::VLD1DUPq8Pseudo: 2617 case ARM::VLD1DUPq16Pseudo: 2618 case ARM::VLD1DUPq32Pseudo: 2619 case ARM::VLD1DUPq8Pseudo_UPD: 2620 case ARM::VLD1DUPq16Pseudo_UPD: 2621 case ARM::VLD1DUPq32Pseudo_UPD: 2622 case ARM::VLD2DUPd8Pseudo: 2623 case ARM::VLD2DUPd16Pseudo: 2624 case ARM::VLD2DUPd32Pseudo: 2625 case ARM::VLD2DUPd8Pseudo_UPD: 2626 case ARM::VLD2DUPd16Pseudo_UPD: 2627 case ARM::VLD2DUPd32Pseudo_UPD: 2628 case ARM::VLD4DUPd8Pseudo: 2629 case ARM::VLD4DUPd16Pseudo: 2630 case ARM::VLD4DUPd32Pseudo: 2631 case ARM::VLD4DUPd8Pseudo_UPD: 2632 case ARM::VLD4DUPd16Pseudo_UPD: 2633 case ARM::VLD4DUPd32Pseudo_UPD: 2634 case ARM::VLD1LNq8Pseudo: 2635 case ARM::VLD1LNq16Pseudo: 2636 case ARM::VLD1LNq32Pseudo: 2637 case ARM::VLD1LNq8Pseudo_UPD: 2638 case ARM::VLD1LNq16Pseudo_UPD: 2639 case ARM::VLD1LNq32Pseudo_UPD: 2640 case ARM::VLD2LNd8Pseudo: 2641 case ARM::VLD2LNd16Pseudo: 2642 case ARM::VLD2LNd32Pseudo: 2643 case ARM::VLD2LNq16Pseudo: 2644 case ARM::VLD2LNq32Pseudo: 2645 case ARM::VLD2LNd8Pseudo_UPD: 2646 case ARM::VLD2LNd16Pseudo_UPD: 2647 case ARM::VLD2LNd32Pseudo_UPD: 2648 case ARM::VLD2LNq16Pseudo_UPD: 2649 case ARM::VLD2LNq32Pseudo_UPD: 2650 case ARM::VLD4LNd8Pseudo: 2651 case ARM::VLD4LNd16Pseudo: 2652 case ARM::VLD4LNd32Pseudo: 2653 case ARM::VLD4LNq16Pseudo: 2654 case ARM::VLD4LNq32Pseudo: 2655 case ARM::VLD4LNd8Pseudo_UPD: 2656 case ARM::VLD4LNd16Pseudo_UPD: 2657 case ARM::VLD4LNd32Pseudo_UPD: 2658 case ARM::VLD4LNq16Pseudo_UPD: 2659 case ARM::VLD4LNq32Pseudo_UPD: 2660 // If the address is not 64-bit aligned, the latencies of these 2661 // instructions increases by one. 2662 ++Latency; 2663 break; 2664 } 2665 2666 return Latency; 2667 } 2668 2669 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 2670 const MachineInstr *MI, 2671 unsigned *PredCost) const { 2672 if (MI->isCopyLike() || MI->isInsertSubreg() || 2673 MI->isRegSequence() || MI->isImplicitDef()) 2674 return 1; 2675 2676 if (!ItinData || ItinData->isEmpty()) 2677 return 1; 2678 2679 const MCInstrDesc &MCID = MI->getDesc(); 2680 unsigned Class = MCID.getSchedClass(); 2681 unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; 2682 if (PredCost && MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) 2683 // When predicated, CPSR is an additional source operand for CPSR updating 2684 // instructions, this apparently increases their latencies. 2685 *PredCost = 1; 2686 if (UOps) 2687 return ItinData->getStageLatency(Class); 2688 return getNumMicroOps(ItinData, MI); 2689 } 2690 2691 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 2692 SDNode *Node) const { 2693 if (!Node->isMachineOpcode()) 2694 return 1; 2695 2696 if (!ItinData || ItinData->isEmpty()) 2697 return 1; 2698 2699 unsigned Opcode = Node->getMachineOpcode(); 2700 switch (Opcode) { 2701 default: 2702 return ItinData->getStageLatency(get(Opcode).getSchedClass()); 2703 case ARM::VLDMQIA: 2704 case ARM::VSTMQIA: 2705 return 2; 2706 } 2707 } 2708 2709 bool ARMBaseInstrInfo:: 2710 hasHighOperandLatency(const InstrItineraryData *ItinData, 2711 const MachineRegisterInfo *MRI, 2712 const MachineInstr *DefMI, unsigned DefIdx, 2713 const MachineInstr *UseMI, unsigned UseIdx) const { 2714 unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; 2715 unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask; 2716 if (Subtarget.isCortexA8() && 2717 (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP)) 2718 // CortexA8 VFP instructions are not pipelined. 2719 return true; 2720 2721 // Hoist VFP / NEON instructions with 4 or higher latency. 2722 int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); 2723 if (Latency <= 3) 2724 return false; 2725 return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || 2726 UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON; 2727 } 2728 2729 bool ARMBaseInstrInfo:: 2730 hasLowDefLatency(const InstrItineraryData *ItinData, 2731 const MachineInstr *DefMI, unsigned DefIdx) const { 2732 if (!ItinData || ItinData->isEmpty()) 2733 return false; 2734 2735 unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; 2736 if (DDomain == ARMII::DomainGeneral) { 2737 unsigned DefClass = DefMI->getDesc().getSchedClass(); 2738 int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 2739 return (DefCycle != -1 && DefCycle <= 2); 2740 } 2741 return false; 2742 } 2743 2744 bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI, 2745 StringRef &ErrInfo) const { 2746 if (convertAddSubFlagsOpcode(MI->getOpcode())) { 2747 ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG"; 2748 return false; 2749 } 2750 return true; 2751 } 2752 2753 bool 2754 ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, 2755 unsigned &AddSubOpc, 2756 bool &NegAcc, bool &HasLane) const { 2757 DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode); 2758 if (I == MLxEntryMap.end()) 2759 return false; 2760 2761 const ARM_MLxEntry &Entry = ARM_MLxTable[I->second]; 2762 MulOpc = Entry.MulOpc; 2763 AddSubOpc = Entry.AddSubOpc; 2764 NegAcc = Entry.NegAcc; 2765 HasLane = Entry.HasLane; 2766 return true; 2767 } 2768 2769 //===----------------------------------------------------------------------===// 2770 // Execution domains. 2771 //===----------------------------------------------------------------------===// 2772 // 2773 // Some instructions go down the NEON pipeline, some go down the VFP pipeline, 2774 // and some can go down both. The vmov instructions go down the VFP pipeline, 2775 // but they can be changed to vorr equivalents that are executed by the NEON 2776 // pipeline. 2777 // 2778 // We use the following execution domain numbering: 2779 // 2780 enum ARMExeDomain { 2781 ExeGeneric = 0, 2782 ExeVFP = 1, 2783 ExeNEON = 2 2784 }; 2785 // 2786 // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h 2787 // 2788 std::pair<uint16_t, uint16_t> 2789 ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { 2790 // VMOVD is a VFP instruction, but can be changed to NEON if it isn't 2791 // predicated. 2792 if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) 2793 return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); 2794 2795 // No other instructions can be swizzled, so just determine their domain. 2796 unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask; 2797 2798 if (Domain & ARMII::DomainNEON) 2799 return std::make_pair(ExeNEON, 0); 2800 2801 // Certain instructions can go either way on Cortex-A8. 2802 // Treat them as NEON instructions. 2803 if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8()) 2804 return std::make_pair(ExeNEON, 0); 2805 2806 if (Domain & ARMII::DomainVFP) 2807 return std::make_pair(ExeVFP, 0); 2808 2809 return std::make_pair(ExeGeneric, 0); 2810 } 2811 2812 void 2813 ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { 2814 // We only know how to change VMOVD into VORR. 2815 assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD"); 2816 if (Domain != ExeNEON) 2817 return; 2818 2819 // Zap the predicate operands. 2820 assert(!isPredicated(MI) && "Cannot predicate a VORRd"); 2821 MI->RemoveOperand(3); 2822 MI->RemoveOperand(2); 2823 2824 // Change to a VORRd which requires two identical use operands. 2825 MI->setDesc(get(ARM::VORRd)); 2826 2827 // Add the extra source operand and new predicates. 2828 // This will go before any implicit ops. 2829 AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); 2830 } 2831