1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the AArch64 implementation of the TargetInstrInfo class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64InstrInfo.h" 15 #include "AArch64MachineCombinerPattern.h" 16 #include "AArch64Subtarget.h" 17 #include "MCTargetDesc/AArch64AddressingModes.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/CodeGen/MachineInstrBuilder.h" 20 #include "llvm/CodeGen/MachineMemOperand.h" 21 #include "llvm/CodeGen/MachineRegisterInfo.h" 22 #include "llvm/CodeGen/PseudoSourceValue.h" 23 #include "llvm/MC/MCInst.h" 24 #include "llvm/Support/ErrorHandling.h" 25 #include "llvm/Support/TargetRegistry.h" 26 27 using namespace llvm; 28 29 #define GET_INSTRINFO_CTOR_DTOR 30 #include "AArch64GenInstrInfo.inc" 31 32 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) 33 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), 34 RI(STI.getTargetTriple()), Subtarget(STI) {} 35 36 /// GetInstSize - Return the number of bytes of code the specified 37 /// instruction may be. This returns the maximum number of bytes. 38 unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { 39 const MachineBasicBlock &MBB = *MI->getParent(); 40 const MachineFunction *MF = MBB.getParent(); 41 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 42 43 if (MI->getOpcode() == AArch64::INLINEASM) 44 return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); 45 46 const MCInstrDesc &Desc = MI->getDesc(); 47 switch (Desc.getOpcode()) { 48 default: 49 // Anything not explicitly designated otherwise is a nomal 4-byte insn. 50 return 4; 51 case TargetOpcode::DBG_VALUE: 52 case TargetOpcode::EH_LABEL: 53 case TargetOpcode::IMPLICIT_DEF: 54 case TargetOpcode::KILL: 55 return 0; 56 } 57 58 llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size"); 59 } 60 61 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, 62 SmallVectorImpl<MachineOperand> &Cond) { 63 // Block ends with fall-through condbranch. 64 switch (LastInst->getOpcode()) { 65 default: 66 llvm_unreachable("Unknown branch instruction?"); 67 case AArch64::Bcc: 68 Target = LastInst->getOperand(1).getMBB(); 69 Cond.push_back(LastInst->getOperand(0)); 70 break; 71 case AArch64::CBZW: 72 case AArch64::CBZX: 73 case AArch64::CBNZW: 74 case AArch64::CBNZX: 75 Target = LastInst->getOperand(1).getMBB(); 76 Cond.push_back(MachineOperand::CreateImm(-1)); 77 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 78 Cond.push_back(LastInst->getOperand(0)); 79 break; 80 case AArch64::TBZW: 81 case AArch64::TBZX: 82 case AArch64::TBNZW: 83 case AArch64::TBNZX: 84 Target = LastInst->getOperand(2).getMBB(); 85 Cond.push_back(MachineOperand::CreateImm(-1)); 86 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 87 Cond.push_back(LastInst->getOperand(0)); 88 Cond.push_back(LastInst->getOperand(1)); 89 } 90 } 91 92 // Branch analysis. 93 bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 94 MachineBasicBlock *&TBB, 95 MachineBasicBlock *&FBB, 96 SmallVectorImpl<MachineOperand> &Cond, 97 bool AllowModify) const { 98 // If the block has no terminators, it just falls into the block after it. 99 MachineBasicBlock::iterator I = MBB.end(); 100 if (I == MBB.begin()) 101 return false; 102 --I; 103 while (I->isDebugValue()) { 104 if (I == MBB.begin()) 105 return false; 106 --I; 107 } 108 if (!isUnpredicatedTerminator(I)) 109 return false; 110 111 // Get the last instruction in the block. 112 MachineInstr *LastInst = I; 113 114 // If there is only one terminator instruction, process it. 115 unsigned LastOpc = LastInst->getOpcode(); 116 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 117 if (isUncondBranchOpcode(LastOpc)) { 118 TBB = LastInst->getOperand(0).getMBB(); 119 return false; 120 } 121 if (isCondBranchOpcode(LastOpc)) { 122 // Block ends with fall-through condbranch. 123 parseCondBranch(LastInst, TBB, Cond); 124 return false; 125 } 126 return true; // Can't handle indirect branch. 127 } 128 129 // Get the instruction before it if it is a terminator. 130 MachineInstr *SecondLastInst = I; 131 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 132 133 // If AllowModify is true and the block ends with two or more unconditional 134 // branches, delete all but the first unconditional branch. 135 if (AllowModify && isUncondBranchOpcode(LastOpc)) { 136 while (isUncondBranchOpcode(SecondLastOpc)) { 137 LastInst->eraseFromParent(); 138 LastInst = SecondLastInst; 139 LastOpc = LastInst->getOpcode(); 140 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 141 // Return now the only terminator is an unconditional branch. 142 TBB = LastInst->getOperand(0).getMBB(); 143 return false; 144 } else { 145 SecondLastInst = I; 146 SecondLastOpc = SecondLastInst->getOpcode(); 147 } 148 } 149 } 150 151 // If there are three terminators, we don't know what sort of block this is. 152 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) 153 return true; 154 155 // If the block ends with a B and a Bcc, handle it. 156 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 157 parseCondBranch(SecondLastInst, TBB, Cond); 158 FBB = LastInst->getOperand(0).getMBB(); 159 return false; 160 } 161 162 // If the block ends with two unconditional branches, handle it. The second 163 // one is not executed, so remove it. 164 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 165 TBB = SecondLastInst->getOperand(0).getMBB(); 166 I = LastInst; 167 if (AllowModify) 168 I->eraseFromParent(); 169 return false; 170 } 171 172 // ...likewise if it ends with an indirect branch followed by an unconditional 173 // branch. 174 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 175 I = LastInst; 176 if (AllowModify) 177 I->eraseFromParent(); 178 return true; 179 } 180 181 // Otherwise, can't handle this. 182 return true; 183 } 184 185 bool AArch64InstrInfo::ReverseBranchCondition( 186 SmallVectorImpl<MachineOperand> &Cond) const { 187 if (Cond[0].getImm() != -1) { 188 // Regular Bcc 189 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); 190 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC)); 191 } else { 192 // Folded compare-and-branch 193 switch (Cond[1].getImm()) { 194 default: 195 llvm_unreachable("Unknown conditional branch!"); 196 case AArch64::CBZW: 197 Cond[1].setImm(AArch64::CBNZW); 198 break; 199 case AArch64::CBNZW: 200 Cond[1].setImm(AArch64::CBZW); 201 break; 202 case AArch64::CBZX: 203 Cond[1].setImm(AArch64::CBNZX); 204 break; 205 case AArch64::CBNZX: 206 Cond[1].setImm(AArch64::CBZX); 207 break; 208 case AArch64::TBZW: 209 Cond[1].setImm(AArch64::TBNZW); 210 break; 211 case AArch64::TBNZW: 212 Cond[1].setImm(AArch64::TBZW); 213 break; 214 case AArch64::TBZX: 215 Cond[1].setImm(AArch64::TBNZX); 216 break; 217 case AArch64::TBNZX: 218 Cond[1].setImm(AArch64::TBZX); 219 break; 220 } 221 } 222 223 return false; 224 } 225 226 unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 227 MachineBasicBlock::iterator I = MBB.end(); 228 if (I == MBB.begin()) 229 return 0; 230 --I; 231 while (I->isDebugValue()) { 232 if (I == MBB.begin()) 233 return 0; 234 --I; 235 } 236 if (!isUncondBranchOpcode(I->getOpcode()) && 237 !isCondBranchOpcode(I->getOpcode())) 238 return 0; 239 240 // Remove the branch. 241 I->eraseFromParent(); 242 243 I = MBB.end(); 244 245 if (I == MBB.begin()) 246 return 1; 247 --I; 248 if (!isCondBranchOpcode(I->getOpcode())) 249 return 1; 250 251 // Remove the branch. 252 I->eraseFromParent(); 253 return 2; 254 } 255 256 void AArch64InstrInfo::instantiateCondBranch( 257 MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB, 258 const SmallVectorImpl<MachineOperand> &Cond) const { 259 if (Cond[0].getImm() != -1) { 260 // Regular Bcc 261 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); 262 } else { 263 // Folded compare-and-branch 264 // Note that we use addOperand instead of addReg to keep the flags. 265 const MachineInstrBuilder MIB = 266 BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]); 267 if (Cond.size() > 3) 268 MIB.addImm(Cond[3].getImm()); 269 MIB.addMBB(TBB); 270 } 271 } 272 273 unsigned AArch64InstrInfo::InsertBranch( 274 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, 275 const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const { 276 // Shouldn't be a fall through. 277 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 278 279 if (!FBB) { 280 if (Cond.empty()) // Unconditional branch? 281 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB); 282 else 283 instantiateCondBranch(MBB, DL, TBB, Cond); 284 return 1; 285 } 286 287 // Two-way conditional branch. 288 instantiateCondBranch(MBB, DL, TBB, Cond); 289 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB); 290 return 2; 291 } 292 293 // Find the original register that VReg is copied from. 294 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { 295 while (TargetRegisterInfo::isVirtualRegister(VReg)) { 296 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 297 if (!DefMI->isFullCopy()) 298 return VReg; 299 VReg = DefMI->getOperand(1).getReg(); 300 } 301 return VReg; 302 } 303 304 // Determine if VReg is defined by an instruction that can be folded into a 305 // csel instruction. If so, return the folded opcode, and the replacement 306 // register. 307 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, 308 unsigned *NewVReg = nullptr) { 309 VReg = removeCopies(MRI, VReg); 310 if (!TargetRegisterInfo::isVirtualRegister(VReg)) 311 return 0; 312 313 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); 314 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 315 unsigned Opc = 0; 316 unsigned SrcOpNum = 0; 317 switch (DefMI->getOpcode()) { 318 case AArch64::ADDSXri: 319 case AArch64::ADDSWri: 320 // if NZCV is used, do not fold. 321 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 322 return 0; 323 // fall-through to ADDXri and ADDWri. 324 case AArch64::ADDXri: 325 case AArch64::ADDWri: 326 // add x, 1 -> csinc. 327 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 || 328 DefMI->getOperand(3).getImm() != 0) 329 return 0; 330 SrcOpNum = 1; 331 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr; 332 break; 333 334 case AArch64::ORNXrr: 335 case AArch64::ORNWrr: { 336 // not x -> csinv, represented as orn dst, xzr, src. 337 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 338 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 339 return 0; 340 SrcOpNum = 2; 341 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr; 342 break; 343 } 344 345 case AArch64::SUBSXrr: 346 case AArch64::SUBSWrr: 347 // if NZCV is used, do not fold. 348 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 349 return 0; 350 // fall-through to SUBXrr and SUBWrr. 351 case AArch64::SUBXrr: 352 case AArch64::SUBWrr: { 353 // neg x -> csneg, represented as sub dst, xzr, src. 354 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 355 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 356 return 0; 357 SrcOpNum = 2; 358 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr; 359 break; 360 } 361 default: 362 return 0; 363 } 364 assert(Opc && SrcOpNum && "Missing parameters"); 365 366 if (NewVReg) 367 *NewVReg = DefMI->getOperand(SrcOpNum).getReg(); 368 return Opc; 369 } 370 371 bool AArch64InstrInfo::canInsertSelect( 372 const MachineBasicBlock &MBB, const SmallVectorImpl<MachineOperand> &Cond, 373 unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, 374 int &FalseCycles) const { 375 // Check register classes. 376 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 377 const TargetRegisterClass *RC = 378 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); 379 if (!RC) 380 return false; 381 382 // Expanding cbz/tbz requires an extra cycle of latency on the condition. 383 unsigned ExtraCondLat = Cond.size() != 1; 384 385 // GPRs are handled by csel. 386 // FIXME: Fold in x+1, -x, and ~x when applicable. 387 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) || 388 AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 389 // Single-cycle csel, csinc, csinv, and csneg. 390 CondCycles = 1 + ExtraCondLat; 391 TrueCycles = FalseCycles = 1; 392 if (canFoldIntoCSel(MRI, TrueReg)) 393 TrueCycles = 0; 394 else if (canFoldIntoCSel(MRI, FalseReg)) 395 FalseCycles = 0; 396 return true; 397 } 398 399 // Scalar floating point is handled by fcsel. 400 // FIXME: Form fabs, fmin, and fmax when applicable. 401 if (AArch64::FPR64RegClass.hasSubClassEq(RC) || 402 AArch64::FPR32RegClass.hasSubClassEq(RC)) { 403 CondCycles = 5 + ExtraCondLat; 404 TrueCycles = FalseCycles = 2; 405 return true; 406 } 407 408 // Can't do vectors. 409 return false; 410 } 411 412 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, 413 MachineBasicBlock::iterator I, DebugLoc DL, 414 unsigned DstReg, 415 const SmallVectorImpl<MachineOperand> &Cond, 416 unsigned TrueReg, unsigned FalseReg) const { 417 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 418 419 // Parse the condition code, see parseCondBranch() above. 420 AArch64CC::CondCode CC; 421 switch (Cond.size()) { 422 default: 423 llvm_unreachable("Unknown condition opcode in Cond"); 424 case 1: // b.cc 425 CC = AArch64CC::CondCode(Cond[0].getImm()); 426 break; 427 case 3: { // cbz/cbnz 428 // We must insert a compare against 0. 429 bool Is64Bit; 430 switch (Cond[1].getImm()) { 431 default: 432 llvm_unreachable("Unknown branch opcode in Cond"); 433 case AArch64::CBZW: 434 Is64Bit = 0; 435 CC = AArch64CC::EQ; 436 break; 437 case AArch64::CBZX: 438 Is64Bit = 1; 439 CC = AArch64CC::EQ; 440 break; 441 case AArch64::CBNZW: 442 Is64Bit = 0; 443 CC = AArch64CC::NE; 444 break; 445 case AArch64::CBNZX: 446 Is64Bit = 1; 447 CC = AArch64CC::NE; 448 break; 449 } 450 unsigned SrcReg = Cond[2].getReg(); 451 if (Is64Bit) { 452 // cmp reg, #0 is actually subs xzr, reg, #0. 453 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass); 454 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR) 455 .addReg(SrcReg) 456 .addImm(0) 457 .addImm(0); 458 } else { 459 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass); 460 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR) 461 .addReg(SrcReg) 462 .addImm(0) 463 .addImm(0); 464 } 465 break; 466 } 467 case 4: { // tbz/tbnz 468 // We must insert a tst instruction. 469 switch (Cond[1].getImm()) { 470 default: 471 llvm_unreachable("Unknown branch opcode in Cond"); 472 case AArch64::TBZW: 473 case AArch64::TBZX: 474 CC = AArch64CC::EQ; 475 break; 476 case AArch64::TBNZW: 477 case AArch64::TBNZX: 478 CC = AArch64CC::NE; 479 break; 480 } 481 // cmp reg, #foo is actually ands xzr, reg, #1<<foo. 482 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW) 483 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR) 484 .addReg(Cond[2].getReg()) 485 .addImm( 486 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32)); 487 else 488 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR) 489 .addReg(Cond[2].getReg()) 490 .addImm( 491 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64)); 492 break; 493 } 494 } 495 496 unsigned Opc = 0; 497 const TargetRegisterClass *RC = nullptr; 498 bool TryFold = false; 499 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) { 500 RC = &AArch64::GPR64RegClass; 501 Opc = AArch64::CSELXr; 502 TryFold = true; 503 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) { 504 RC = &AArch64::GPR32RegClass; 505 Opc = AArch64::CSELWr; 506 TryFold = true; 507 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) { 508 RC = &AArch64::FPR64RegClass; 509 Opc = AArch64::FCSELDrrr; 510 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) { 511 RC = &AArch64::FPR32RegClass; 512 Opc = AArch64::FCSELSrrr; 513 } 514 assert(RC && "Unsupported regclass"); 515 516 // Try folding simple instructions into the csel. 517 if (TryFold) { 518 unsigned NewVReg = 0; 519 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg); 520 if (FoldedOpc) { 521 // The folded opcodes csinc, csinc and csneg apply the operation to 522 // FalseReg, so we need to invert the condition. 523 CC = AArch64CC::getInvertedCondCode(CC); 524 TrueReg = FalseReg; 525 } else 526 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg); 527 528 // Fold the operation. Leave any dead instructions for DCE to clean up. 529 if (FoldedOpc) { 530 FalseReg = NewVReg; 531 Opc = FoldedOpc; 532 // The extends the live range of NewVReg. 533 MRI.clearKillFlags(NewVReg); 534 } 535 } 536 537 // Pull all virtual register into the appropriate class. 538 MRI.constrainRegClass(TrueReg, RC); 539 MRI.constrainRegClass(FalseReg, RC); 540 541 // Insert the csel. 542 BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm( 543 CC); 544 } 545 546 // FIXME: this implementation should be micro-architecture dependent, so a 547 // micro-architecture target hook should be introduced here in future. 548 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const { 549 if (!Subtarget.isCortexA57() && !Subtarget.isCortexA53()) 550 return MI->isAsCheapAsAMove(); 551 552 switch (MI->getOpcode()) { 553 default: 554 return false; 555 556 // add/sub on register without shift 557 case AArch64::ADDWri: 558 case AArch64::ADDXri: 559 case AArch64::SUBWri: 560 case AArch64::SUBXri: 561 return (MI->getOperand(3).getImm() == 0); 562 563 // logical ops on immediate 564 case AArch64::ANDWri: 565 case AArch64::ANDXri: 566 case AArch64::EORWri: 567 case AArch64::EORXri: 568 case AArch64::ORRWri: 569 case AArch64::ORRXri: 570 return true; 571 572 // logical ops on register without shift 573 case AArch64::ANDWrr: 574 case AArch64::ANDXrr: 575 case AArch64::BICWrr: 576 case AArch64::BICXrr: 577 case AArch64::EONWrr: 578 case AArch64::EONXrr: 579 case AArch64::EORWrr: 580 case AArch64::EORXrr: 581 case AArch64::ORNWrr: 582 case AArch64::ORNXrr: 583 case AArch64::ORRWrr: 584 case AArch64::ORRXrr: 585 return true; 586 } 587 588 llvm_unreachable("Unknown opcode to check as cheap as a move!"); 589 } 590 591 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 592 unsigned &SrcReg, unsigned &DstReg, 593 unsigned &SubIdx) const { 594 switch (MI.getOpcode()) { 595 default: 596 return false; 597 case AArch64::SBFMXri: // aka sxtw 598 case AArch64::UBFMXri: // aka uxtw 599 // Check for the 32 -> 64 bit extension case, these instructions can do 600 // much more. 601 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31) 602 return false; 603 // This is a signed or unsigned 32 -> 64 bit extension. 604 SrcReg = MI.getOperand(1).getReg(); 605 DstReg = MI.getOperand(0).getReg(); 606 SubIdx = AArch64::sub_32; 607 return true; 608 } 609 } 610 611 bool 612 AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, 613 MachineInstr *MIb, 614 AliasAnalysis *AA) const { 615 const TargetRegisterInfo *TRI = &getRegisterInfo(); 616 unsigned BaseRegA = 0, BaseRegB = 0; 617 int OffsetA = 0, OffsetB = 0; 618 int WidthA = 0, WidthB = 0; 619 620 assert(MIa && (MIa->mayLoad() || MIa->mayStore()) && 621 "MIa must be a store or a load"); 622 assert(MIb && (MIb->mayLoad() || MIb->mayStore()) && 623 "MIb must be a store or a load"); 624 625 if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() || 626 MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef()) 627 return false; 628 629 // Retrieve the base register, offset from the base register and width. Width 630 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If 631 // base registers are identical, and the offset of a lower memory access + 632 // the width doesn't overlap the offset of a higher memory access, 633 // then the memory accesses are different. 634 if (getLdStBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) && 635 getLdStBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) { 636 if (BaseRegA == BaseRegB) { 637 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 638 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 639 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 640 if (LowOffset + LowWidth <= HighOffset) 641 return true; 642 } 643 } 644 return false; 645 } 646 647 /// analyzeCompare - For a comparison instruction, return the source registers 648 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. 649 /// Return true if the comparison instruction can be analyzed. 650 bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, 651 unsigned &SrcReg2, int &CmpMask, 652 int &CmpValue) const { 653 switch (MI->getOpcode()) { 654 default: 655 break; 656 case AArch64::SUBSWrr: 657 case AArch64::SUBSWrs: 658 case AArch64::SUBSWrx: 659 case AArch64::SUBSXrr: 660 case AArch64::SUBSXrs: 661 case AArch64::SUBSXrx: 662 case AArch64::ADDSWrr: 663 case AArch64::ADDSWrs: 664 case AArch64::ADDSWrx: 665 case AArch64::ADDSXrr: 666 case AArch64::ADDSXrs: 667 case AArch64::ADDSXrx: 668 // Replace SUBSWrr with SUBWrr if NZCV is not used. 669 SrcReg = MI->getOperand(1).getReg(); 670 SrcReg2 = MI->getOperand(2).getReg(); 671 CmpMask = ~0; 672 CmpValue = 0; 673 return true; 674 case AArch64::SUBSWri: 675 case AArch64::ADDSWri: 676 case AArch64::SUBSXri: 677 case AArch64::ADDSXri: 678 SrcReg = MI->getOperand(1).getReg(); 679 SrcReg2 = 0; 680 CmpMask = ~0; 681 // FIXME: In order to convert CmpValue to 0 or 1 682 CmpValue = (MI->getOperand(2).getImm() != 0); 683 return true; 684 case AArch64::ANDSWri: 685 case AArch64::ANDSXri: 686 // ANDS does not use the same encoding scheme as the others xxxS 687 // instructions. 688 SrcReg = MI->getOperand(1).getReg(); 689 SrcReg2 = 0; 690 CmpMask = ~0; 691 // FIXME:The return val type of decodeLogicalImmediate is uint64_t, 692 // while the type of CmpValue is int. When converting uint64_t to int, 693 // the high 32 bits of uint64_t will be lost. 694 // In fact it causes a bug in spec2006-483.xalancbmk 695 // CmpValue is only used to compare with zero in OptimizeCompareInstr 696 CmpValue = (AArch64_AM::decodeLogicalImmediate( 697 MI->getOperand(2).getImm(), 698 MI->getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0); 699 return true; 700 } 701 702 return false; 703 } 704 705 static bool UpdateOperandRegClass(MachineInstr *Instr) { 706 MachineBasicBlock *MBB = Instr->getParent(); 707 assert(MBB && "Can't get MachineBasicBlock here"); 708 MachineFunction *MF = MBB->getParent(); 709 assert(MF && "Can't get MachineFunction here"); 710 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 711 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 712 MachineRegisterInfo *MRI = &MF->getRegInfo(); 713 714 for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx; 715 ++OpIdx) { 716 MachineOperand &MO = Instr->getOperand(OpIdx); 717 const TargetRegisterClass *OpRegCstraints = 718 Instr->getRegClassConstraint(OpIdx, TII, TRI); 719 720 // If there's no constraint, there's nothing to do. 721 if (!OpRegCstraints) 722 continue; 723 // If the operand is a frame index, there's nothing to do here. 724 // A frame index operand will resolve correctly during PEI. 725 if (MO.isFI()) 726 continue; 727 728 assert(MO.isReg() && 729 "Operand has register constraints without being a register!"); 730 731 unsigned Reg = MO.getReg(); 732 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 733 if (!OpRegCstraints->contains(Reg)) 734 return false; 735 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) && 736 !MRI->constrainRegClass(Reg, OpRegCstraints)) 737 return false; 738 } 739 740 return true; 741 } 742 743 /// \brief Return the opcode that does not set flags when possible - otherwise 744 /// return the original opcode. The caller is responsible to do the actual 745 /// substitution and legality checking. 746 static unsigned convertFlagSettingOpcode(const MachineInstr *MI) { 747 // Don't convert all compare instructions, because for some the zero register 748 // encoding becomes the sp register. 749 bool MIDefinesZeroReg = false; 750 if (MI->definesRegister(AArch64::WZR) || MI->definesRegister(AArch64::XZR)) 751 MIDefinesZeroReg = true; 752 753 switch (MI->getOpcode()) { 754 default: 755 return MI->getOpcode(); 756 case AArch64::ADDSWrr: 757 return AArch64::ADDWrr; 758 case AArch64::ADDSWri: 759 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri; 760 case AArch64::ADDSWrs: 761 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs; 762 case AArch64::ADDSWrx: 763 return AArch64::ADDWrx; 764 case AArch64::ADDSXrr: 765 return AArch64::ADDXrr; 766 case AArch64::ADDSXri: 767 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri; 768 case AArch64::ADDSXrs: 769 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs; 770 case AArch64::ADDSXrx: 771 return AArch64::ADDXrx; 772 case AArch64::SUBSWrr: 773 return AArch64::SUBWrr; 774 case AArch64::SUBSWri: 775 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri; 776 case AArch64::SUBSWrs: 777 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs; 778 case AArch64::SUBSWrx: 779 return AArch64::SUBWrx; 780 case AArch64::SUBSXrr: 781 return AArch64::SUBXrr; 782 case AArch64::SUBSXri: 783 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri; 784 case AArch64::SUBSXrs: 785 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs; 786 case AArch64::SUBSXrx: 787 return AArch64::SUBXrx; 788 } 789 } 790 791 /// True when condition code could be modified on the instruction 792 /// trace starting at from and ending at to. 793 static bool modifiesConditionCode(MachineInstr *From, MachineInstr *To, 794 const bool CheckOnlyCCWrites, 795 const TargetRegisterInfo *TRI) { 796 // We iterate backward starting \p To until we hit \p From 797 MachineBasicBlock::iterator I = To, E = From, B = To->getParent()->begin(); 798 799 // Early exit if To is at the beginning of the BB. 800 if (I == B) 801 return true; 802 803 // Check whether the definition of SrcReg is in the same basic block as 804 // Compare. If not, assume the condition code gets modified on some path. 805 if (To->getParent() != From->getParent()) 806 return true; 807 808 // Check that NZCV isn't set on the trace. 809 for (--I; I != E; --I) { 810 const MachineInstr &Instr = *I; 811 812 if (Instr.modifiesRegister(AArch64::NZCV, TRI) || 813 (!CheckOnlyCCWrites && Instr.readsRegister(AArch64::NZCV, TRI))) 814 // This instruction modifies or uses NZCV after the one we want to 815 // change. 816 return true; 817 if (I == B) 818 // We currently don't allow the instruction trace to cross basic 819 // block boundaries 820 return true; 821 } 822 return false; 823 } 824 /// optimizeCompareInstr - Convert the instruction supplying the argument to the 825 /// comparison into one that sets the zero bit in the flags register. 826 bool AArch64InstrInfo::optimizeCompareInstr( 827 MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, 828 int CmpValue, const MachineRegisterInfo *MRI) const { 829 830 // Replace SUBSWrr with SUBWrr if NZCV is not used. 831 int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true); 832 if (Cmp_NZCV != -1) { 833 if (CmpInstr->definesRegister(AArch64::WZR) || 834 CmpInstr->definesRegister(AArch64::XZR)) { 835 CmpInstr->eraseFromParent(); 836 return true; 837 } 838 unsigned Opc = CmpInstr->getOpcode(); 839 unsigned NewOpc = convertFlagSettingOpcode(CmpInstr); 840 if (NewOpc == Opc) 841 return false; 842 const MCInstrDesc &MCID = get(NewOpc); 843 CmpInstr->setDesc(MCID); 844 CmpInstr->RemoveOperand(Cmp_NZCV); 845 bool succeeded = UpdateOperandRegClass(CmpInstr); 846 (void)succeeded; 847 assert(succeeded && "Some operands reg class are incompatible!"); 848 return true; 849 } 850 851 // Continue only if we have a "ri" where immediate is zero. 852 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare 853 // function. 854 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!"); 855 if (CmpValue != 0 || SrcReg2 != 0) 856 return false; 857 858 // CmpInstr is a Compare instruction if destination register is not used. 859 if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg())) 860 return false; 861 862 // Get the unique definition of SrcReg. 863 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 864 if (!MI) 865 return false; 866 867 bool CheckOnlyCCWrites = false; 868 const TargetRegisterInfo *TRI = &getRegisterInfo(); 869 if (modifiesConditionCode(MI, CmpInstr, CheckOnlyCCWrites, TRI)) 870 return false; 871 872 unsigned NewOpc = MI->getOpcode(); 873 switch (MI->getOpcode()) { 874 default: 875 return false; 876 case AArch64::ADDSWrr: 877 case AArch64::ADDSWri: 878 case AArch64::ADDSXrr: 879 case AArch64::ADDSXri: 880 case AArch64::SUBSWrr: 881 case AArch64::SUBSWri: 882 case AArch64::SUBSXrr: 883 case AArch64::SUBSXri: 884 break; 885 case AArch64::ADDWrr: NewOpc = AArch64::ADDSWrr; break; 886 case AArch64::ADDWri: NewOpc = AArch64::ADDSWri; break; 887 case AArch64::ADDXrr: NewOpc = AArch64::ADDSXrr; break; 888 case AArch64::ADDXri: NewOpc = AArch64::ADDSXri; break; 889 case AArch64::ADCWr: NewOpc = AArch64::ADCSWr; break; 890 case AArch64::ADCXr: NewOpc = AArch64::ADCSXr; break; 891 case AArch64::SUBWrr: NewOpc = AArch64::SUBSWrr; break; 892 case AArch64::SUBWri: NewOpc = AArch64::SUBSWri; break; 893 case AArch64::SUBXrr: NewOpc = AArch64::SUBSXrr; break; 894 case AArch64::SUBXri: NewOpc = AArch64::SUBSXri; break; 895 case AArch64::SBCWr: NewOpc = AArch64::SBCSWr; break; 896 case AArch64::SBCXr: NewOpc = AArch64::SBCSXr; break; 897 case AArch64::ANDWri: NewOpc = AArch64::ANDSWri; break; 898 case AArch64::ANDXri: NewOpc = AArch64::ANDSXri; break; 899 } 900 901 // Scan forward for the use of NZCV. 902 // When checking against MI: if it's a conditional code requires 903 // checking of V bit, then this is not safe to do. 904 // It is safe to remove CmpInstr if NZCV is redefined or killed. 905 // If we are done with the basic block, we need to check whether NZCV is 906 // live-out. 907 bool IsSafe = false; 908 for (MachineBasicBlock::iterator I = CmpInstr, 909 E = CmpInstr->getParent()->end(); 910 !IsSafe && ++I != E;) { 911 const MachineInstr &Instr = *I; 912 for (unsigned IO = 0, EO = Instr.getNumOperands(); !IsSafe && IO != EO; 913 ++IO) { 914 const MachineOperand &MO = Instr.getOperand(IO); 915 if (MO.isRegMask() && MO.clobbersPhysReg(AArch64::NZCV)) { 916 IsSafe = true; 917 break; 918 } 919 if (!MO.isReg() || MO.getReg() != AArch64::NZCV) 920 continue; 921 if (MO.isDef()) { 922 IsSafe = true; 923 break; 924 } 925 926 // Decode the condition code. 927 unsigned Opc = Instr.getOpcode(); 928 AArch64CC::CondCode CC; 929 switch (Opc) { 930 default: 931 return false; 932 case AArch64::Bcc: 933 CC = (AArch64CC::CondCode)Instr.getOperand(IO - 2).getImm(); 934 break; 935 case AArch64::CSINVWr: 936 case AArch64::CSINVXr: 937 case AArch64::CSINCWr: 938 case AArch64::CSINCXr: 939 case AArch64::CSELWr: 940 case AArch64::CSELXr: 941 case AArch64::CSNEGWr: 942 case AArch64::CSNEGXr: 943 case AArch64::FCSELSrrr: 944 case AArch64::FCSELDrrr: 945 CC = (AArch64CC::CondCode)Instr.getOperand(IO - 1).getImm(); 946 break; 947 } 948 949 // It is not safe to remove Compare instruction if Overflow(V) is used. 950 switch (CC) { 951 default: 952 // NZCV can be used multiple times, we should continue. 953 break; 954 case AArch64CC::VS: 955 case AArch64CC::VC: 956 case AArch64CC::GE: 957 case AArch64CC::LT: 958 case AArch64CC::GT: 959 case AArch64CC::LE: 960 return false; 961 } 962 } 963 } 964 965 // If NZCV is not killed nor re-defined, we should check whether it is 966 // live-out. If it is live-out, do not optimize. 967 if (!IsSafe) { 968 MachineBasicBlock *ParentBlock = CmpInstr->getParent(); 969 for (auto *MBB : ParentBlock->successors()) 970 if (MBB->isLiveIn(AArch64::NZCV)) 971 return false; 972 } 973 974 // Update the instruction to set NZCV. 975 MI->setDesc(get(NewOpc)); 976 CmpInstr->eraseFromParent(); 977 bool succeeded = UpdateOperandRegClass(MI); 978 (void)succeeded; 979 assert(succeeded && "Some operands reg class are incompatible!"); 980 MI->addRegisterDefined(AArch64::NZCV, TRI); 981 return true; 982 } 983 984 bool 985 AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { 986 if (MI->getOpcode() != TargetOpcode::LOAD_STACK_GUARD) 987 return false; 988 989 MachineBasicBlock &MBB = *MI->getParent(); 990 DebugLoc DL = MI->getDebugLoc(); 991 unsigned Reg = MI->getOperand(0).getReg(); 992 const GlobalValue *GV = 993 cast<GlobalValue>((*MI->memoperands_begin())->getValue()); 994 const TargetMachine &TM = MBB.getParent()->getTarget(); 995 unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM); 996 const unsigned char MO_NC = AArch64II::MO_NC; 997 998 if ((OpFlags & AArch64II::MO_GOT) != 0) { 999 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg) 1000 .addGlobalAddress(GV, 0, AArch64II::MO_GOT); 1001 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1002 .addReg(Reg, RegState::Kill).addImm(0) 1003 .addMemOperand(*MI->memoperands_begin()); 1004 } else if (TM.getCodeModel() == CodeModel::Large) { 1005 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg) 1006 .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48); 1007 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1008 .addReg(Reg, RegState::Kill) 1009 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32); 1010 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1011 .addReg(Reg, RegState::Kill) 1012 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16); 1013 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) 1014 .addReg(Reg, RegState::Kill) 1015 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0); 1016 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1017 .addReg(Reg, RegState::Kill).addImm(0) 1018 .addMemOperand(*MI->memoperands_begin()); 1019 } else { 1020 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg) 1021 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); 1022 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC; 1023 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) 1024 .addReg(Reg, RegState::Kill) 1025 .addGlobalAddress(GV, 0, LoFlags) 1026 .addMemOperand(*MI->memoperands_begin()); 1027 } 1028 1029 MBB.erase(MI); 1030 1031 return true; 1032 } 1033 1034 /// Return true if this is this instruction has a non-zero immediate 1035 bool AArch64InstrInfo::hasShiftedReg(const MachineInstr *MI) const { 1036 switch (MI->getOpcode()) { 1037 default: 1038 break; 1039 case AArch64::ADDSWrs: 1040 case AArch64::ADDSXrs: 1041 case AArch64::ADDWrs: 1042 case AArch64::ADDXrs: 1043 case AArch64::ANDSWrs: 1044 case AArch64::ANDSXrs: 1045 case AArch64::ANDWrs: 1046 case AArch64::ANDXrs: 1047 case AArch64::BICSWrs: 1048 case AArch64::BICSXrs: 1049 case AArch64::BICWrs: 1050 case AArch64::BICXrs: 1051 case AArch64::CRC32Brr: 1052 case AArch64::CRC32CBrr: 1053 case AArch64::CRC32CHrr: 1054 case AArch64::CRC32CWrr: 1055 case AArch64::CRC32CXrr: 1056 case AArch64::CRC32Hrr: 1057 case AArch64::CRC32Wrr: 1058 case AArch64::CRC32Xrr: 1059 case AArch64::EONWrs: 1060 case AArch64::EONXrs: 1061 case AArch64::EORWrs: 1062 case AArch64::EORXrs: 1063 case AArch64::ORNWrs: 1064 case AArch64::ORNXrs: 1065 case AArch64::ORRWrs: 1066 case AArch64::ORRXrs: 1067 case AArch64::SUBSWrs: 1068 case AArch64::SUBSXrs: 1069 case AArch64::SUBWrs: 1070 case AArch64::SUBXrs: 1071 if (MI->getOperand(3).isImm()) { 1072 unsigned val = MI->getOperand(3).getImm(); 1073 return (val != 0); 1074 } 1075 break; 1076 } 1077 return false; 1078 } 1079 1080 /// Return true if this is this instruction has a non-zero immediate 1081 bool AArch64InstrInfo::hasExtendedReg(const MachineInstr *MI) const { 1082 switch (MI->getOpcode()) { 1083 default: 1084 break; 1085 case AArch64::ADDSWrx: 1086 case AArch64::ADDSXrx: 1087 case AArch64::ADDSXrx64: 1088 case AArch64::ADDWrx: 1089 case AArch64::ADDXrx: 1090 case AArch64::ADDXrx64: 1091 case AArch64::SUBSWrx: 1092 case AArch64::SUBSXrx: 1093 case AArch64::SUBSXrx64: 1094 case AArch64::SUBWrx: 1095 case AArch64::SUBXrx: 1096 case AArch64::SUBXrx64: 1097 if (MI->getOperand(3).isImm()) { 1098 unsigned val = MI->getOperand(3).getImm(); 1099 return (val != 0); 1100 } 1101 break; 1102 } 1103 1104 return false; 1105 } 1106 1107 // Return true if this instruction simply sets its single destination register 1108 // to zero. This is equivalent to a register rename of the zero-register. 1109 bool AArch64InstrInfo::isGPRZero(const MachineInstr *MI) const { 1110 switch (MI->getOpcode()) { 1111 default: 1112 break; 1113 case AArch64::MOVZWi: 1114 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0) 1115 if (MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) { 1116 assert(MI->getDesc().getNumOperands() == 3 && 1117 MI->getOperand(2).getImm() == 0 && "invalid MOVZi operands"); 1118 return true; 1119 } 1120 break; 1121 case AArch64::ANDWri: // and Rd, Rzr, #imm 1122 return MI->getOperand(1).getReg() == AArch64::WZR; 1123 case AArch64::ANDXri: 1124 return MI->getOperand(1).getReg() == AArch64::XZR; 1125 case TargetOpcode::COPY: 1126 return MI->getOperand(1).getReg() == AArch64::WZR; 1127 } 1128 return false; 1129 } 1130 1131 // Return true if this instruction simply renames a general register without 1132 // modifying bits. 1133 bool AArch64InstrInfo::isGPRCopy(const MachineInstr *MI) const { 1134 switch (MI->getOpcode()) { 1135 default: 1136 break; 1137 case TargetOpcode::COPY: { 1138 // GPR32 copies will by lowered to ORRXrs 1139 unsigned DstReg = MI->getOperand(0).getReg(); 1140 return (AArch64::GPR32RegClass.contains(DstReg) || 1141 AArch64::GPR64RegClass.contains(DstReg)); 1142 } 1143 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0) 1144 if (MI->getOperand(1).getReg() == AArch64::XZR) { 1145 assert(MI->getDesc().getNumOperands() == 4 && 1146 MI->getOperand(3).getImm() == 0 && "invalid ORRrs operands"); 1147 return true; 1148 } 1149 break; 1150 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0) 1151 if (MI->getOperand(2).getImm() == 0) { 1152 assert(MI->getDesc().getNumOperands() == 4 && 1153 MI->getOperand(3).getImm() == 0 && "invalid ADDXri operands"); 1154 return true; 1155 } 1156 break; 1157 } 1158 return false; 1159 } 1160 1161 // Return true if this instruction simply renames a general register without 1162 // modifying bits. 1163 bool AArch64InstrInfo::isFPRCopy(const MachineInstr *MI) const { 1164 switch (MI->getOpcode()) { 1165 default: 1166 break; 1167 case TargetOpcode::COPY: { 1168 // FPR64 copies will by lowered to ORR.16b 1169 unsigned DstReg = MI->getOperand(0).getReg(); 1170 return (AArch64::FPR64RegClass.contains(DstReg) || 1171 AArch64::FPR128RegClass.contains(DstReg)); 1172 } 1173 case AArch64::ORRv16i8: 1174 if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { 1175 assert(MI->getDesc().getNumOperands() == 3 && MI->getOperand(0).isReg() && 1176 "invalid ORRv16i8 operands"); 1177 return true; 1178 } 1179 break; 1180 } 1181 return false; 1182 } 1183 1184 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 1185 int &FrameIndex) const { 1186 switch (MI->getOpcode()) { 1187 default: 1188 break; 1189 case AArch64::LDRWui: 1190 case AArch64::LDRXui: 1191 case AArch64::LDRBui: 1192 case AArch64::LDRHui: 1193 case AArch64::LDRSui: 1194 case AArch64::LDRDui: 1195 case AArch64::LDRQui: 1196 if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() && 1197 MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { 1198 FrameIndex = MI->getOperand(1).getIndex(); 1199 return MI->getOperand(0).getReg(); 1200 } 1201 break; 1202 } 1203 1204 return 0; 1205 } 1206 1207 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr *MI, 1208 int &FrameIndex) const { 1209 switch (MI->getOpcode()) { 1210 default: 1211 break; 1212 case AArch64::STRWui: 1213 case AArch64::STRXui: 1214 case AArch64::STRBui: 1215 case AArch64::STRHui: 1216 case AArch64::STRSui: 1217 case AArch64::STRDui: 1218 case AArch64::STRQui: 1219 if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() && 1220 MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { 1221 FrameIndex = MI->getOperand(1).getIndex(); 1222 return MI->getOperand(0).getReg(); 1223 } 1224 break; 1225 } 1226 return 0; 1227 } 1228 1229 /// Return true if this is load/store scales or extends its register offset. 1230 /// This refers to scaling a dynamic index as opposed to scaled immediates. 1231 /// MI should be a memory op that allows scaled addressing. 1232 bool AArch64InstrInfo::isScaledAddr(const MachineInstr *MI) const { 1233 switch (MI->getOpcode()) { 1234 default: 1235 break; 1236 case AArch64::LDRBBroW: 1237 case AArch64::LDRBroW: 1238 case AArch64::LDRDroW: 1239 case AArch64::LDRHHroW: 1240 case AArch64::LDRHroW: 1241 case AArch64::LDRQroW: 1242 case AArch64::LDRSBWroW: 1243 case AArch64::LDRSBXroW: 1244 case AArch64::LDRSHWroW: 1245 case AArch64::LDRSHXroW: 1246 case AArch64::LDRSWroW: 1247 case AArch64::LDRSroW: 1248 case AArch64::LDRWroW: 1249 case AArch64::LDRXroW: 1250 case AArch64::STRBBroW: 1251 case AArch64::STRBroW: 1252 case AArch64::STRDroW: 1253 case AArch64::STRHHroW: 1254 case AArch64::STRHroW: 1255 case AArch64::STRQroW: 1256 case AArch64::STRSroW: 1257 case AArch64::STRWroW: 1258 case AArch64::STRXroW: 1259 case AArch64::LDRBBroX: 1260 case AArch64::LDRBroX: 1261 case AArch64::LDRDroX: 1262 case AArch64::LDRHHroX: 1263 case AArch64::LDRHroX: 1264 case AArch64::LDRQroX: 1265 case AArch64::LDRSBWroX: 1266 case AArch64::LDRSBXroX: 1267 case AArch64::LDRSHWroX: 1268 case AArch64::LDRSHXroX: 1269 case AArch64::LDRSWroX: 1270 case AArch64::LDRSroX: 1271 case AArch64::LDRWroX: 1272 case AArch64::LDRXroX: 1273 case AArch64::STRBBroX: 1274 case AArch64::STRBroX: 1275 case AArch64::STRDroX: 1276 case AArch64::STRHHroX: 1277 case AArch64::STRHroX: 1278 case AArch64::STRQroX: 1279 case AArch64::STRSroX: 1280 case AArch64::STRWroX: 1281 case AArch64::STRXroX: 1282 1283 unsigned Val = MI->getOperand(3).getImm(); 1284 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val); 1285 return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val); 1286 } 1287 return false; 1288 } 1289 1290 /// Check all MachineMemOperands for a hint to suppress pairing. 1291 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const { 1292 assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) && 1293 "Too many target MO flags"); 1294 for (auto *MM : MI->memoperands()) { 1295 if (MM->getFlags() & 1296 (MOSuppressPair << MachineMemOperand::MOTargetStartBit)) { 1297 return true; 1298 } 1299 } 1300 return false; 1301 } 1302 1303 /// Set a flag on the first MachineMemOperand to suppress pairing. 1304 void AArch64InstrInfo::suppressLdStPair(MachineInstr *MI) const { 1305 if (MI->memoperands_empty()) 1306 return; 1307 1308 assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) && 1309 "Too many target MO flags"); 1310 (*MI->memoperands_begin()) 1311 ->setFlags(MOSuppressPair << MachineMemOperand::MOTargetStartBit); 1312 } 1313 1314 bool 1315 AArch64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, 1316 unsigned &Offset, 1317 const TargetRegisterInfo *TRI) const { 1318 switch (LdSt->getOpcode()) { 1319 default: 1320 return false; 1321 case AArch64::STRSui: 1322 case AArch64::STRDui: 1323 case AArch64::STRQui: 1324 case AArch64::STRXui: 1325 case AArch64::STRWui: 1326 case AArch64::LDRSui: 1327 case AArch64::LDRDui: 1328 case AArch64::LDRQui: 1329 case AArch64::LDRXui: 1330 case AArch64::LDRWui: 1331 if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm()) 1332 return false; 1333 BaseReg = LdSt->getOperand(1).getReg(); 1334 MachineFunction &MF = *LdSt->getParent()->getParent(); 1335 unsigned Width = getRegClass(LdSt->getDesc(), 0, TRI, MF)->getSize(); 1336 Offset = LdSt->getOperand(2).getImm() * Width; 1337 return true; 1338 }; 1339 } 1340 1341 bool AArch64InstrInfo::getLdStBaseRegImmOfsWidth( 1342 MachineInstr *LdSt, unsigned &BaseReg, int &Offset, int &Width, 1343 const TargetRegisterInfo *TRI) const { 1344 // Handle only loads/stores with base register followed by immediate offset. 1345 if (LdSt->getNumOperands() != 3) 1346 return false; 1347 if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm()) 1348 return false; 1349 1350 // Offset is calculated as the immediate operand multiplied by the scaling factor. 1351 // Unscaled instructions have scaling factor set to 1. 1352 int Scale = 0; 1353 switch (LdSt->getOpcode()) { 1354 default: 1355 return false; 1356 case AArch64::LDURQi: 1357 case AArch64::STURQi: 1358 Width = 16; 1359 Scale = 1; 1360 break; 1361 case AArch64::LDURXi: 1362 case AArch64::LDURDi: 1363 case AArch64::STURXi: 1364 case AArch64::STURDi: 1365 Width = 8; 1366 Scale = 1; 1367 break; 1368 case AArch64::LDURWi: 1369 case AArch64::LDURSi: 1370 case AArch64::LDURSWi: 1371 case AArch64::STURWi: 1372 case AArch64::STURSi: 1373 Width = 4; 1374 Scale = 1; 1375 break; 1376 case AArch64::LDURHi: 1377 case AArch64::LDURHHi: 1378 case AArch64::LDURSHXi: 1379 case AArch64::LDURSHWi: 1380 case AArch64::STURHi: 1381 case AArch64::STURHHi: 1382 Width = 2; 1383 Scale = 1; 1384 break; 1385 case AArch64::LDURBi: 1386 case AArch64::LDURBBi: 1387 case AArch64::LDURSBXi: 1388 case AArch64::LDURSBWi: 1389 case AArch64::STURBi: 1390 case AArch64::STURBBi: 1391 Width = 1; 1392 Scale = 1; 1393 break; 1394 case AArch64::LDRXui: 1395 case AArch64::STRXui: 1396 Scale = Width = 8; 1397 break; 1398 case AArch64::LDRWui: 1399 case AArch64::STRWui: 1400 Scale = Width = 4; 1401 break; 1402 case AArch64::LDRBui: 1403 case AArch64::STRBui: 1404 Scale = Width = 1; 1405 break; 1406 case AArch64::LDRHui: 1407 case AArch64::STRHui: 1408 Scale = Width = 2; 1409 break; 1410 case AArch64::LDRSui: 1411 case AArch64::STRSui: 1412 Scale = Width = 4; 1413 break; 1414 case AArch64::LDRDui: 1415 case AArch64::STRDui: 1416 Scale = Width = 8; 1417 break; 1418 case AArch64::LDRQui: 1419 case AArch64::STRQui: 1420 Scale = Width = 16; 1421 break; 1422 case AArch64::LDRBBui: 1423 case AArch64::STRBBui: 1424 Scale = Width = 1; 1425 break; 1426 case AArch64::LDRHHui: 1427 case AArch64::STRHHui: 1428 Scale = Width = 2; 1429 break; 1430 }; 1431 1432 BaseReg = LdSt->getOperand(1).getReg(); 1433 Offset = LdSt->getOperand(2).getImm() * Scale; 1434 return true; 1435 } 1436 1437 /// Detect opportunities for ldp/stp formation. 1438 /// 1439 /// Only called for LdSt for which getLdStBaseRegImmOfs returns true. 1440 bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, 1441 MachineInstr *SecondLdSt, 1442 unsigned NumLoads) const { 1443 // Only cluster up to a single pair. 1444 if (NumLoads > 1) 1445 return false; 1446 if (FirstLdSt->getOpcode() != SecondLdSt->getOpcode()) 1447 return false; 1448 // getLdStBaseRegImmOfs guarantees that oper 2 isImm. 1449 unsigned Ofs1 = FirstLdSt->getOperand(2).getImm(); 1450 // Allow 6 bits of positive range. 1451 if (Ofs1 > 64) 1452 return false; 1453 // The caller should already have ordered First/SecondLdSt by offset. 1454 unsigned Ofs2 = SecondLdSt->getOperand(2).getImm(); 1455 return Ofs1 + 1 == Ofs2; 1456 } 1457 1458 bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First, 1459 MachineInstr *Second) const { 1460 // Cyclone can fuse CMN, CMP followed by Bcc. 1461 1462 // FIXME: B0 can also fuse: 1463 // AND, BIC, ORN, ORR, or EOR (optional S) followed by Bcc or CBZ or CBNZ. 1464 if (Second->getOpcode() != AArch64::Bcc) 1465 return false; 1466 switch (First->getOpcode()) { 1467 default: 1468 return false; 1469 case AArch64::SUBSWri: 1470 case AArch64::ADDSWri: 1471 case AArch64::ANDSWri: 1472 case AArch64::SUBSXri: 1473 case AArch64::ADDSXri: 1474 case AArch64::ANDSXri: 1475 return true; 1476 } 1477 } 1478 1479 MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue( 1480 MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var, 1481 const MDNode *Expr, DebugLoc DL) const { 1482 MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE)) 1483 .addFrameIndex(FrameIx) 1484 .addImm(0) 1485 .addImm(Offset) 1486 .addMetadata(Var) 1487 .addMetadata(Expr); 1488 return &*MIB; 1489 } 1490 1491 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB, 1492 unsigned Reg, unsigned SubIdx, 1493 unsigned State, 1494 const TargetRegisterInfo *TRI) { 1495 if (!SubIdx) 1496 return MIB.addReg(Reg, State); 1497 1498 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1499 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 1500 return MIB.addReg(Reg, State, SubIdx); 1501 } 1502 1503 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, 1504 unsigned NumRegs) { 1505 // We really want the positive remainder mod 32 here, that happens to be 1506 // easily obtainable with a mask. 1507 return ((DestReg - SrcReg) & 0x1f) < NumRegs; 1508 } 1509 1510 void AArch64InstrInfo::copyPhysRegTuple( 1511 MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, 1512 unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, 1513 llvm::ArrayRef<unsigned> Indices) const { 1514 assert(Subtarget.hasNEON() && 1515 "Unexpected register copy without NEON"); 1516 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1517 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 1518 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 1519 unsigned NumRegs = Indices.size(); 1520 1521 int SubReg = 0, End = NumRegs, Incr = 1; 1522 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) { 1523 SubReg = NumRegs - 1; 1524 End = -1; 1525 Incr = -1; 1526 } 1527 1528 for (; SubReg != End; SubReg += Incr) { 1529 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); 1530 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 1531 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); 1532 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 1533 } 1534 } 1535 1536 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 1537 MachineBasicBlock::iterator I, DebugLoc DL, 1538 unsigned DestReg, unsigned SrcReg, 1539 bool KillSrc) const { 1540 if (AArch64::GPR32spRegClass.contains(DestReg) && 1541 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { 1542 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1543 1544 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { 1545 // If either operand is WSP, expand to ADD #0. 1546 if (Subtarget.hasZeroCycleRegMove()) { 1547 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. 1548 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, 1549 &AArch64::GPR64spRegClass); 1550 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, 1551 &AArch64::GPR64spRegClass); 1552 // This instruction is reading and writing X registers. This may upset 1553 // the register scavenger and machine verifier, so we need to indicate 1554 // that we are reading an undefined value from SrcRegX, but a proper 1555 // value from SrcReg. 1556 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX) 1557 .addReg(SrcRegX, RegState::Undef) 1558 .addImm(0) 1559 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 1560 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 1561 } else { 1562 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg) 1563 .addReg(SrcReg, getKillRegState(KillSrc)) 1564 .addImm(0) 1565 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1566 } 1567 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) { 1568 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm( 1569 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1570 } else { 1571 if (Subtarget.hasZeroCycleRegMove()) { 1572 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. 1573 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, 1574 &AArch64::GPR64spRegClass); 1575 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, 1576 &AArch64::GPR64spRegClass); 1577 // This instruction is reading and writing X registers. This may upset 1578 // the register scavenger and machine verifier, so we need to indicate 1579 // that we are reading an undefined value from SrcRegX, but a proper 1580 // value from SrcReg. 1581 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX) 1582 .addReg(AArch64::XZR) 1583 .addReg(SrcRegX, RegState::Undef) 1584 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 1585 } else { 1586 // Otherwise, expand to ORR WZR. 1587 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg) 1588 .addReg(AArch64::WZR) 1589 .addReg(SrcReg, getKillRegState(KillSrc)); 1590 } 1591 } 1592 return; 1593 } 1594 1595 if (AArch64::GPR64spRegClass.contains(DestReg) && 1596 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { 1597 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { 1598 // If either operand is SP, expand to ADD #0. 1599 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg) 1600 .addReg(SrcReg, getKillRegState(KillSrc)) 1601 .addImm(0) 1602 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1603 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) { 1604 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm( 1605 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1606 } else { 1607 // Otherwise, expand to ORR XZR. 1608 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) 1609 .addReg(AArch64::XZR) 1610 .addReg(SrcReg, getKillRegState(KillSrc)); 1611 } 1612 return; 1613 } 1614 1615 // Copy a DDDD register quad by copying the individual sub-registers. 1616 if (AArch64::DDDDRegClass.contains(DestReg) && 1617 AArch64::DDDDRegClass.contains(SrcReg)) { 1618 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, 1619 AArch64::dsub2, AArch64::dsub3 }; 1620 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 1621 Indices); 1622 return; 1623 } 1624 1625 // Copy a DDD register triple by copying the individual sub-registers. 1626 if (AArch64::DDDRegClass.contains(DestReg) && 1627 AArch64::DDDRegClass.contains(SrcReg)) { 1628 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, 1629 AArch64::dsub2 }; 1630 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 1631 Indices); 1632 return; 1633 } 1634 1635 // Copy a DD register pair by copying the individual sub-registers. 1636 if (AArch64::DDRegClass.contains(DestReg) && 1637 AArch64::DDRegClass.contains(SrcReg)) { 1638 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 }; 1639 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 1640 Indices); 1641 return; 1642 } 1643 1644 // Copy a QQQQ register quad by copying the individual sub-registers. 1645 if (AArch64::QQQQRegClass.contains(DestReg) && 1646 AArch64::QQQQRegClass.contains(SrcReg)) { 1647 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, 1648 AArch64::qsub2, AArch64::qsub3 }; 1649 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 1650 Indices); 1651 return; 1652 } 1653 1654 // Copy a QQQ register triple by copying the individual sub-registers. 1655 if (AArch64::QQQRegClass.contains(DestReg) && 1656 AArch64::QQQRegClass.contains(SrcReg)) { 1657 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, 1658 AArch64::qsub2 }; 1659 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 1660 Indices); 1661 return; 1662 } 1663 1664 // Copy a QQ register pair by copying the individual sub-registers. 1665 if (AArch64::QQRegClass.contains(DestReg) && 1666 AArch64::QQRegClass.contains(SrcReg)) { 1667 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 }; 1668 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 1669 Indices); 1670 return; 1671 } 1672 1673 if (AArch64::FPR128RegClass.contains(DestReg) && 1674 AArch64::FPR128RegClass.contains(SrcReg)) { 1675 if(Subtarget.hasNEON()) { 1676 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 1677 .addReg(SrcReg) 1678 .addReg(SrcReg, getKillRegState(KillSrc)); 1679 } else { 1680 BuildMI(MBB, I, DL, get(AArch64::STRQpre)) 1681 .addReg(AArch64::SP, RegState::Define) 1682 .addReg(SrcReg, getKillRegState(KillSrc)) 1683 .addReg(AArch64::SP) 1684 .addImm(-16); 1685 BuildMI(MBB, I, DL, get(AArch64::LDRQpre)) 1686 .addReg(AArch64::SP, RegState::Define) 1687 .addReg(DestReg, RegState::Define) 1688 .addReg(AArch64::SP) 1689 .addImm(16); 1690 } 1691 return; 1692 } 1693 1694 if (AArch64::FPR64RegClass.contains(DestReg) && 1695 AArch64::FPR64RegClass.contains(SrcReg)) { 1696 if(Subtarget.hasNEON()) { 1697 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub, 1698 &AArch64::FPR128RegClass); 1699 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, 1700 &AArch64::FPR128RegClass); 1701 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 1702 .addReg(SrcReg) 1703 .addReg(SrcReg, getKillRegState(KillSrc)); 1704 } else { 1705 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) 1706 .addReg(SrcReg, getKillRegState(KillSrc)); 1707 } 1708 return; 1709 } 1710 1711 if (AArch64::FPR32RegClass.contains(DestReg) && 1712 AArch64::FPR32RegClass.contains(SrcReg)) { 1713 if(Subtarget.hasNEON()) { 1714 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub, 1715 &AArch64::FPR128RegClass); 1716 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, 1717 &AArch64::FPR128RegClass); 1718 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 1719 .addReg(SrcReg) 1720 .addReg(SrcReg, getKillRegState(KillSrc)); 1721 } else { 1722 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 1723 .addReg(SrcReg, getKillRegState(KillSrc)); 1724 } 1725 return; 1726 } 1727 1728 if (AArch64::FPR16RegClass.contains(DestReg) && 1729 AArch64::FPR16RegClass.contains(SrcReg)) { 1730 if(Subtarget.hasNEON()) { 1731 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 1732 &AArch64::FPR128RegClass); 1733 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 1734 &AArch64::FPR128RegClass); 1735 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 1736 .addReg(SrcReg) 1737 .addReg(SrcReg, getKillRegState(KillSrc)); 1738 } else { 1739 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 1740 &AArch64::FPR32RegClass); 1741 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 1742 &AArch64::FPR32RegClass); 1743 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 1744 .addReg(SrcReg, getKillRegState(KillSrc)); 1745 } 1746 return; 1747 } 1748 1749 if (AArch64::FPR8RegClass.contains(DestReg) && 1750 AArch64::FPR8RegClass.contains(SrcReg)) { 1751 if(Subtarget.hasNEON()) { 1752 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 1753 &AArch64::FPR128RegClass); 1754 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 1755 &AArch64::FPR128RegClass); 1756 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 1757 .addReg(SrcReg) 1758 .addReg(SrcReg, getKillRegState(KillSrc)); 1759 } else { 1760 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 1761 &AArch64::FPR32RegClass); 1762 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 1763 &AArch64::FPR32RegClass); 1764 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 1765 .addReg(SrcReg, getKillRegState(KillSrc)); 1766 } 1767 return; 1768 } 1769 1770 // Copies between GPR64 and FPR64. 1771 if (AArch64::FPR64RegClass.contains(DestReg) && 1772 AArch64::GPR64RegClass.contains(SrcReg)) { 1773 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg) 1774 .addReg(SrcReg, getKillRegState(KillSrc)); 1775 return; 1776 } 1777 if (AArch64::GPR64RegClass.contains(DestReg) && 1778 AArch64::FPR64RegClass.contains(SrcReg)) { 1779 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg) 1780 .addReg(SrcReg, getKillRegState(KillSrc)); 1781 return; 1782 } 1783 // Copies between GPR32 and FPR32. 1784 if (AArch64::FPR32RegClass.contains(DestReg) && 1785 AArch64::GPR32RegClass.contains(SrcReg)) { 1786 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg) 1787 .addReg(SrcReg, getKillRegState(KillSrc)); 1788 return; 1789 } 1790 if (AArch64::GPR32RegClass.contains(DestReg) && 1791 AArch64::FPR32RegClass.contains(SrcReg)) { 1792 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg) 1793 .addReg(SrcReg, getKillRegState(KillSrc)); 1794 return; 1795 } 1796 1797 if (DestReg == AArch64::NZCV) { 1798 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy"); 1799 BuildMI(MBB, I, DL, get(AArch64::MSR)) 1800 .addImm(AArch64SysReg::NZCV) 1801 .addReg(SrcReg, getKillRegState(KillSrc)) 1802 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define); 1803 return; 1804 } 1805 1806 if (SrcReg == AArch64::NZCV) { 1807 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy"); 1808 BuildMI(MBB, I, DL, get(AArch64::MRS)) 1809 .addReg(DestReg) 1810 .addImm(AArch64SysReg::NZCV) 1811 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc)); 1812 return; 1813 } 1814 1815 llvm_unreachable("unimplemented reg-to-reg copy"); 1816 } 1817 1818 void AArch64InstrInfo::storeRegToStackSlot( 1819 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, 1820 bool isKill, int FI, const TargetRegisterClass *RC, 1821 const TargetRegisterInfo *TRI) const { 1822 DebugLoc DL; 1823 if (MBBI != MBB.end()) 1824 DL = MBBI->getDebugLoc(); 1825 MachineFunction &MF = *MBB.getParent(); 1826 MachineFrameInfo &MFI = *MF.getFrameInfo(); 1827 unsigned Align = MFI.getObjectAlignment(FI); 1828 1829 MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI)); 1830 MachineMemOperand *MMO = MF.getMachineMemOperand( 1831 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); 1832 unsigned Opc = 0; 1833 bool Offset = true; 1834 switch (RC->getSize()) { 1835 case 1: 1836 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 1837 Opc = AArch64::STRBui; 1838 break; 1839 case 2: 1840 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 1841 Opc = AArch64::STRHui; 1842 break; 1843 case 4: 1844 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 1845 Opc = AArch64::STRWui; 1846 if (TargetRegisterInfo::isVirtualRegister(SrcReg)) 1847 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass); 1848 else 1849 assert(SrcReg != AArch64::WSP); 1850 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 1851 Opc = AArch64::STRSui; 1852 break; 1853 case 8: 1854 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 1855 Opc = AArch64::STRXui; 1856 if (TargetRegisterInfo::isVirtualRegister(SrcReg)) 1857 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 1858 else 1859 assert(SrcReg != AArch64::SP); 1860 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) 1861 Opc = AArch64::STRDui; 1862 break; 1863 case 16: 1864 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 1865 Opc = AArch64::STRQui; 1866 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 1867 assert(Subtarget.hasNEON() && 1868 "Unexpected register store without NEON"); 1869 Opc = AArch64::ST1Twov1d, Offset = false; 1870 } 1871 break; 1872 case 24: 1873 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 1874 assert(Subtarget.hasNEON() && 1875 "Unexpected register store without NEON"); 1876 Opc = AArch64::ST1Threev1d, Offset = false; 1877 } 1878 break; 1879 case 32: 1880 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 1881 assert(Subtarget.hasNEON() && 1882 "Unexpected register store without NEON"); 1883 Opc = AArch64::ST1Fourv1d, Offset = false; 1884 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 1885 assert(Subtarget.hasNEON() && 1886 "Unexpected register store without NEON"); 1887 Opc = AArch64::ST1Twov2d, Offset = false; 1888 } 1889 break; 1890 case 48: 1891 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 1892 assert(Subtarget.hasNEON() && 1893 "Unexpected register store without NEON"); 1894 Opc = AArch64::ST1Threev2d, Offset = false; 1895 } 1896 break; 1897 case 64: 1898 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 1899 assert(Subtarget.hasNEON() && 1900 "Unexpected register store without NEON"); 1901 Opc = AArch64::ST1Fourv2d, Offset = false; 1902 } 1903 break; 1904 } 1905 assert(Opc && "Unknown register class"); 1906 1907 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) 1908 .addReg(SrcReg, getKillRegState(isKill)) 1909 .addFrameIndex(FI); 1910 1911 if (Offset) 1912 MI.addImm(0); 1913 MI.addMemOperand(MMO); 1914 } 1915 1916 void AArch64InstrInfo::loadRegFromStackSlot( 1917 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, 1918 int FI, const TargetRegisterClass *RC, 1919 const TargetRegisterInfo *TRI) const { 1920 DebugLoc DL; 1921 if (MBBI != MBB.end()) 1922 DL = MBBI->getDebugLoc(); 1923 MachineFunction &MF = *MBB.getParent(); 1924 MachineFrameInfo &MFI = *MF.getFrameInfo(); 1925 unsigned Align = MFI.getObjectAlignment(FI); 1926 MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI)); 1927 MachineMemOperand *MMO = MF.getMachineMemOperand( 1928 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); 1929 1930 unsigned Opc = 0; 1931 bool Offset = true; 1932 switch (RC->getSize()) { 1933 case 1: 1934 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 1935 Opc = AArch64::LDRBui; 1936 break; 1937 case 2: 1938 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 1939 Opc = AArch64::LDRHui; 1940 break; 1941 case 4: 1942 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 1943 Opc = AArch64::LDRWui; 1944 if (TargetRegisterInfo::isVirtualRegister(DestReg)) 1945 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass); 1946 else 1947 assert(DestReg != AArch64::WSP); 1948 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 1949 Opc = AArch64::LDRSui; 1950 break; 1951 case 8: 1952 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 1953 Opc = AArch64::LDRXui; 1954 if (TargetRegisterInfo::isVirtualRegister(DestReg)) 1955 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass); 1956 else 1957 assert(DestReg != AArch64::SP); 1958 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) 1959 Opc = AArch64::LDRDui; 1960 break; 1961 case 16: 1962 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 1963 Opc = AArch64::LDRQui; 1964 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 1965 assert(Subtarget.hasNEON() && 1966 "Unexpected register load without NEON"); 1967 Opc = AArch64::LD1Twov1d, Offset = false; 1968 } 1969 break; 1970 case 24: 1971 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 1972 assert(Subtarget.hasNEON() && 1973 "Unexpected register load without NEON"); 1974 Opc = AArch64::LD1Threev1d, Offset = false; 1975 } 1976 break; 1977 case 32: 1978 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 1979 assert(Subtarget.hasNEON() && 1980 "Unexpected register load without NEON"); 1981 Opc = AArch64::LD1Fourv1d, Offset = false; 1982 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 1983 assert(Subtarget.hasNEON() && 1984 "Unexpected register load without NEON"); 1985 Opc = AArch64::LD1Twov2d, Offset = false; 1986 } 1987 break; 1988 case 48: 1989 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 1990 assert(Subtarget.hasNEON() && 1991 "Unexpected register load without NEON"); 1992 Opc = AArch64::LD1Threev2d, Offset = false; 1993 } 1994 break; 1995 case 64: 1996 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 1997 assert(Subtarget.hasNEON() && 1998 "Unexpected register load without NEON"); 1999 Opc = AArch64::LD1Fourv2d, Offset = false; 2000 } 2001 break; 2002 } 2003 assert(Opc && "Unknown register class"); 2004 2005 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) 2006 .addReg(DestReg, getDefRegState(true)) 2007 .addFrameIndex(FI); 2008 if (Offset) 2009 MI.addImm(0); 2010 MI.addMemOperand(MMO); 2011 } 2012 2013 void llvm::emitFrameOffset(MachineBasicBlock &MBB, 2014 MachineBasicBlock::iterator MBBI, DebugLoc DL, 2015 unsigned DestReg, unsigned SrcReg, int Offset, 2016 const TargetInstrInfo *TII, 2017 MachineInstr::MIFlag Flag, bool SetNZCV) { 2018 if (DestReg == SrcReg && Offset == 0) 2019 return; 2020 2021 bool isSub = Offset < 0; 2022 if (isSub) 2023 Offset = -Offset; 2024 2025 // FIXME: If the offset won't fit in 24-bits, compute the offset into a 2026 // scratch register. If DestReg is a virtual register, use it as the 2027 // scratch register; otherwise, create a new virtual register (to be 2028 // replaced by the scavenger at the end of PEI). That case can be optimized 2029 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch 2030 // register can be loaded with offset%8 and the add/sub can use an extending 2031 // instruction with LSL#3. 2032 // Currently the function handles any offsets but generates a poor sequence 2033 // of code. 2034 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); 2035 2036 unsigned Opc; 2037 if (SetNZCV) 2038 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri; 2039 else 2040 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri; 2041 const unsigned MaxEncoding = 0xfff; 2042 const unsigned ShiftSize = 12; 2043 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; 2044 while (((unsigned)Offset) >= (1 << ShiftSize)) { 2045 unsigned ThisVal; 2046 if (((unsigned)Offset) > MaxEncodableValue) { 2047 ThisVal = MaxEncodableValue; 2048 } else { 2049 ThisVal = Offset & MaxEncodableValue; 2050 } 2051 assert((ThisVal >> ShiftSize) <= MaxEncoding && 2052 "Encoding cannot handle value that big"); 2053 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) 2054 .addReg(SrcReg) 2055 .addImm(ThisVal >> ShiftSize) 2056 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize)) 2057 .setMIFlag(Flag); 2058 2059 SrcReg = DestReg; 2060 Offset -= ThisVal; 2061 if (Offset == 0) 2062 return; 2063 } 2064 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) 2065 .addReg(SrcReg) 2066 .addImm(Offset) 2067 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 2068 .setMIFlag(Flag); 2069 } 2070 2071 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2072 MachineInstr *MI, 2073 ArrayRef<unsigned> Ops, 2074 int FrameIndex) const { 2075 // This is a bit of a hack. Consider this instruction: 2076 // 2077 // %vreg0<def> = COPY %SP; GPR64all:%vreg0 2078 // 2079 // We explicitly chose GPR64all for the virtual register so such a copy might 2080 // be eliminated by RegisterCoalescer. However, that may not be possible, and 2081 // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all 2082 // register class, TargetInstrInfo::foldMemoryOperand() is going to try. 2083 // 2084 // To prevent that, we are going to constrain the %vreg0 register class here. 2085 // 2086 // <rdar://problem/11522048> 2087 // 2088 if (MI->isCopy()) { 2089 unsigned DstReg = MI->getOperand(0).getReg(); 2090 unsigned SrcReg = MI->getOperand(1).getReg(); 2091 if (SrcReg == AArch64::SP && 2092 TargetRegisterInfo::isVirtualRegister(DstReg)) { 2093 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass); 2094 return nullptr; 2095 } 2096 if (DstReg == AArch64::SP && 2097 TargetRegisterInfo::isVirtualRegister(SrcReg)) { 2098 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 2099 return nullptr; 2100 } 2101 } 2102 2103 // Cannot fold. 2104 return nullptr; 2105 } 2106 2107 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, 2108 bool *OutUseUnscaledOp, 2109 unsigned *OutUnscaledOp, 2110 int *EmittableOffset) { 2111 int Scale = 1; 2112 bool IsSigned = false; 2113 // The ImmIdx should be changed case by case if it is not 2. 2114 unsigned ImmIdx = 2; 2115 unsigned UnscaledOp = 0; 2116 // Set output values in case of early exit. 2117 if (EmittableOffset) 2118 *EmittableOffset = 0; 2119 if (OutUseUnscaledOp) 2120 *OutUseUnscaledOp = false; 2121 if (OutUnscaledOp) 2122 *OutUnscaledOp = 0; 2123 switch (MI.getOpcode()) { 2124 default: 2125 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex"); 2126 // Vector spills/fills can't take an immediate offset. 2127 case AArch64::LD1Twov2d: 2128 case AArch64::LD1Threev2d: 2129 case AArch64::LD1Fourv2d: 2130 case AArch64::LD1Twov1d: 2131 case AArch64::LD1Threev1d: 2132 case AArch64::LD1Fourv1d: 2133 case AArch64::ST1Twov2d: 2134 case AArch64::ST1Threev2d: 2135 case AArch64::ST1Fourv2d: 2136 case AArch64::ST1Twov1d: 2137 case AArch64::ST1Threev1d: 2138 case AArch64::ST1Fourv1d: 2139 return AArch64FrameOffsetCannotUpdate; 2140 case AArch64::PRFMui: 2141 Scale = 8; 2142 UnscaledOp = AArch64::PRFUMi; 2143 break; 2144 case AArch64::LDRXui: 2145 Scale = 8; 2146 UnscaledOp = AArch64::LDURXi; 2147 break; 2148 case AArch64::LDRWui: 2149 Scale = 4; 2150 UnscaledOp = AArch64::LDURWi; 2151 break; 2152 case AArch64::LDRBui: 2153 Scale = 1; 2154 UnscaledOp = AArch64::LDURBi; 2155 break; 2156 case AArch64::LDRHui: 2157 Scale = 2; 2158 UnscaledOp = AArch64::LDURHi; 2159 break; 2160 case AArch64::LDRSui: 2161 Scale = 4; 2162 UnscaledOp = AArch64::LDURSi; 2163 break; 2164 case AArch64::LDRDui: 2165 Scale = 8; 2166 UnscaledOp = AArch64::LDURDi; 2167 break; 2168 case AArch64::LDRQui: 2169 Scale = 16; 2170 UnscaledOp = AArch64::LDURQi; 2171 break; 2172 case AArch64::LDRBBui: 2173 Scale = 1; 2174 UnscaledOp = AArch64::LDURBBi; 2175 break; 2176 case AArch64::LDRHHui: 2177 Scale = 2; 2178 UnscaledOp = AArch64::LDURHHi; 2179 break; 2180 case AArch64::LDRSBXui: 2181 Scale = 1; 2182 UnscaledOp = AArch64::LDURSBXi; 2183 break; 2184 case AArch64::LDRSBWui: 2185 Scale = 1; 2186 UnscaledOp = AArch64::LDURSBWi; 2187 break; 2188 case AArch64::LDRSHXui: 2189 Scale = 2; 2190 UnscaledOp = AArch64::LDURSHXi; 2191 break; 2192 case AArch64::LDRSHWui: 2193 Scale = 2; 2194 UnscaledOp = AArch64::LDURSHWi; 2195 break; 2196 case AArch64::LDRSWui: 2197 Scale = 4; 2198 UnscaledOp = AArch64::LDURSWi; 2199 break; 2200 2201 case AArch64::STRXui: 2202 Scale = 8; 2203 UnscaledOp = AArch64::STURXi; 2204 break; 2205 case AArch64::STRWui: 2206 Scale = 4; 2207 UnscaledOp = AArch64::STURWi; 2208 break; 2209 case AArch64::STRBui: 2210 Scale = 1; 2211 UnscaledOp = AArch64::STURBi; 2212 break; 2213 case AArch64::STRHui: 2214 Scale = 2; 2215 UnscaledOp = AArch64::STURHi; 2216 break; 2217 case AArch64::STRSui: 2218 Scale = 4; 2219 UnscaledOp = AArch64::STURSi; 2220 break; 2221 case AArch64::STRDui: 2222 Scale = 8; 2223 UnscaledOp = AArch64::STURDi; 2224 break; 2225 case AArch64::STRQui: 2226 Scale = 16; 2227 UnscaledOp = AArch64::STURQi; 2228 break; 2229 case AArch64::STRBBui: 2230 Scale = 1; 2231 UnscaledOp = AArch64::STURBBi; 2232 break; 2233 case AArch64::STRHHui: 2234 Scale = 2; 2235 UnscaledOp = AArch64::STURHHi; 2236 break; 2237 2238 case AArch64::LDPXi: 2239 case AArch64::LDPDi: 2240 case AArch64::STPXi: 2241 case AArch64::STPDi: 2242 IsSigned = true; 2243 Scale = 8; 2244 break; 2245 case AArch64::LDPQi: 2246 case AArch64::STPQi: 2247 IsSigned = true; 2248 Scale = 16; 2249 break; 2250 case AArch64::LDPWi: 2251 case AArch64::LDPSi: 2252 case AArch64::STPWi: 2253 case AArch64::STPSi: 2254 IsSigned = true; 2255 Scale = 4; 2256 break; 2257 2258 case AArch64::LDURXi: 2259 case AArch64::LDURWi: 2260 case AArch64::LDURBi: 2261 case AArch64::LDURHi: 2262 case AArch64::LDURSi: 2263 case AArch64::LDURDi: 2264 case AArch64::LDURQi: 2265 case AArch64::LDURHHi: 2266 case AArch64::LDURBBi: 2267 case AArch64::LDURSBXi: 2268 case AArch64::LDURSBWi: 2269 case AArch64::LDURSHXi: 2270 case AArch64::LDURSHWi: 2271 case AArch64::LDURSWi: 2272 case AArch64::STURXi: 2273 case AArch64::STURWi: 2274 case AArch64::STURBi: 2275 case AArch64::STURHi: 2276 case AArch64::STURSi: 2277 case AArch64::STURDi: 2278 case AArch64::STURQi: 2279 case AArch64::STURBBi: 2280 case AArch64::STURHHi: 2281 Scale = 1; 2282 break; 2283 } 2284 2285 Offset += MI.getOperand(ImmIdx).getImm() * Scale; 2286 2287 bool useUnscaledOp = false; 2288 // If the offset doesn't match the scale, we rewrite the instruction to 2289 // use the unscaled instruction instead. Likewise, if we have a negative 2290 // offset (and have an unscaled op to use). 2291 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0)) 2292 useUnscaledOp = true; 2293 2294 // Use an unscaled addressing mode if the instruction has a negative offset 2295 // (or if the instruction is already using an unscaled addressing mode). 2296 unsigned MaskBits; 2297 if (IsSigned) { 2298 // ldp/stp instructions. 2299 MaskBits = 7; 2300 Offset /= Scale; 2301 } else if (UnscaledOp == 0 || useUnscaledOp) { 2302 MaskBits = 9; 2303 IsSigned = true; 2304 Scale = 1; 2305 } else { 2306 MaskBits = 12; 2307 IsSigned = false; 2308 Offset /= Scale; 2309 } 2310 2311 // Attempt to fold address computation. 2312 int MaxOff = (1 << (MaskBits - IsSigned)) - 1; 2313 int MinOff = (IsSigned ? (-MaxOff - 1) : 0); 2314 if (Offset >= MinOff && Offset <= MaxOff) { 2315 if (EmittableOffset) 2316 *EmittableOffset = Offset; 2317 Offset = 0; 2318 } else { 2319 int NewOff = Offset < 0 ? MinOff : MaxOff; 2320 if (EmittableOffset) 2321 *EmittableOffset = NewOff; 2322 Offset = (Offset - NewOff) * Scale; 2323 } 2324 if (OutUseUnscaledOp) 2325 *OutUseUnscaledOp = useUnscaledOp; 2326 if (OutUnscaledOp) 2327 *OutUnscaledOp = UnscaledOp; 2328 return AArch64FrameOffsetCanUpdate | 2329 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0); 2330 } 2331 2332 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 2333 unsigned FrameReg, int &Offset, 2334 const AArch64InstrInfo *TII) { 2335 unsigned Opcode = MI.getOpcode(); 2336 unsigned ImmIdx = FrameRegIdx + 1; 2337 2338 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { 2339 Offset += MI.getOperand(ImmIdx).getImm(); 2340 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), 2341 MI.getOperand(0).getReg(), FrameReg, Offset, TII, 2342 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); 2343 MI.eraseFromParent(); 2344 Offset = 0; 2345 return true; 2346 } 2347 2348 int NewOffset; 2349 unsigned UnscaledOp; 2350 bool UseUnscaledOp; 2351 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, 2352 &UnscaledOp, &NewOffset); 2353 if (Status & AArch64FrameOffsetCanUpdate) { 2354 if (Status & AArch64FrameOffsetIsLegal) 2355 // Replace the FrameIndex with FrameReg. 2356 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 2357 if (UseUnscaledOp) 2358 MI.setDesc(TII->get(UnscaledOp)); 2359 2360 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); 2361 return Offset == 0; 2362 } 2363 2364 return false; 2365 } 2366 2367 void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { 2368 NopInst.setOpcode(AArch64::HINT); 2369 NopInst.addOperand(MCOperand::CreateImm(0)); 2370 } 2371 /// useMachineCombiner - return true when a target supports MachineCombiner 2372 bool AArch64InstrInfo::useMachineCombiner() const { 2373 // AArch64 supports the combiner 2374 return true; 2375 } 2376 // 2377 // True when Opc sets flag 2378 static bool isCombineInstrSettingFlag(unsigned Opc) { 2379 switch (Opc) { 2380 case AArch64::ADDSWrr: 2381 case AArch64::ADDSWri: 2382 case AArch64::ADDSXrr: 2383 case AArch64::ADDSXri: 2384 case AArch64::SUBSWrr: 2385 case AArch64::SUBSXrr: 2386 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 2387 case AArch64::SUBSWri: 2388 case AArch64::SUBSXri: 2389 return true; 2390 default: 2391 break; 2392 } 2393 return false; 2394 } 2395 // 2396 // 32b Opcodes that can be combined with a MUL 2397 static bool isCombineInstrCandidate32(unsigned Opc) { 2398 switch (Opc) { 2399 case AArch64::ADDWrr: 2400 case AArch64::ADDWri: 2401 case AArch64::SUBWrr: 2402 case AArch64::ADDSWrr: 2403 case AArch64::ADDSWri: 2404 case AArch64::SUBSWrr: 2405 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 2406 case AArch64::SUBWri: 2407 case AArch64::SUBSWri: 2408 return true; 2409 default: 2410 break; 2411 } 2412 return false; 2413 } 2414 // 2415 // 64b Opcodes that can be combined with a MUL 2416 static bool isCombineInstrCandidate64(unsigned Opc) { 2417 switch (Opc) { 2418 case AArch64::ADDXrr: 2419 case AArch64::ADDXri: 2420 case AArch64::SUBXrr: 2421 case AArch64::ADDSXrr: 2422 case AArch64::ADDSXri: 2423 case AArch64::SUBSXrr: 2424 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi. 2425 case AArch64::SUBXri: 2426 case AArch64::SUBSXri: 2427 return true; 2428 default: 2429 break; 2430 } 2431 return false; 2432 } 2433 // 2434 // Opcodes that can be combined with a MUL 2435 static bool isCombineInstrCandidate(unsigned Opc) { 2436 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc)); 2437 } 2438 2439 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, 2440 unsigned MulOpc, unsigned ZeroReg) { 2441 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2442 MachineInstr *MI = nullptr; 2443 // We need a virtual register definition. 2444 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) 2445 MI = MRI.getUniqueVRegDef(MO.getReg()); 2446 // And it needs to be in the trace (otherwise, it won't have a depth). 2447 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != MulOpc) 2448 return false; 2449 2450 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() && 2451 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && 2452 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs"); 2453 2454 // The third input reg must be zero. 2455 if (MI->getOperand(3).getReg() != ZeroReg) 2456 return false; 2457 2458 // Must only used by the user we combine with. 2459 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 2460 return false; 2461 2462 return true; 2463 } 2464 2465 /// hasPattern - return true when there is potentially a faster code sequence 2466 /// for an instruction chain ending in \p Root. All potential patterns are 2467 /// listed 2468 /// in the \p Pattern vector. Pattern should be sorted in priority order since 2469 /// the pattern evaluator stops checking as soon as it finds a faster sequence. 2470 2471 bool AArch64InstrInfo::hasPattern( 2472 MachineInstr &Root, 2473 SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Pattern) const { 2474 unsigned Opc = Root.getOpcode(); 2475 MachineBasicBlock &MBB = *Root.getParent(); 2476 bool Found = false; 2477 2478 if (!isCombineInstrCandidate(Opc)) 2479 return 0; 2480 if (isCombineInstrSettingFlag(Opc)) { 2481 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true); 2482 // When NZCV is live bail out. 2483 if (Cmp_NZCV == -1) 2484 return 0; 2485 unsigned NewOpc = convertFlagSettingOpcode(&Root); 2486 // When opcode can't change bail out. 2487 // CHECKME: do we miss any cases for opcode conversion? 2488 if (NewOpc == Opc) 2489 return 0; 2490 Opc = NewOpc; 2491 } 2492 2493 switch (Opc) { 2494 default: 2495 break; 2496 case AArch64::ADDWrr: 2497 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && 2498 "ADDWrr does not have register operands"); 2499 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 2500 AArch64::WZR)) { 2501 Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP1); 2502 Found = true; 2503 } 2504 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, 2505 AArch64::WZR)) { 2506 Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP2); 2507 Found = true; 2508 } 2509 break; 2510 case AArch64::ADDXrr: 2511 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 2512 AArch64::XZR)) { 2513 Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP1); 2514 Found = true; 2515 } 2516 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, 2517 AArch64::XZR)) { 2518 Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP2); 2519 Found = true; 2520 } 2521 break; 2522 case AArch64::SUBWrr: 2523 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 2524 AArch64::WZR)) { 2525 Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP1); 2526 Found = true; 2527 } 2528 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, 2529 AArch64::WZR)) { 2530 Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP2); 2531 Found = true; 2532 } 2533 break; 2534 case AArch64::SUBXrr: 2535 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 2536 AArch64::XZR)) { 2537 Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP1); 2538 Found = true; 2539 } 2540 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, 2541 AArch64::XZR)) { 2542 Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP2); 2543 Found = true; 2544 } 2545 break; 2546 case AArch64::ADDWri: 2547 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 2548 AArch64::WZR)) { 2549 Pattern.push_back(MachineCombinerPattern::MC_MULADDWI_OP1); 2550 Found = true; 2551 } 2552 break; 2553 case AArch64::ADDXri: 2554 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 2555 AArch64::XZR)) { 2556 Pattern.push_back(MachineCombinerPattern::MC_MULADDXI_OP1); 2557 Found = true; 2558 } 2559 break; 2560 case AArch64::SUBWri: 2561 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, 2562 AArch64::WZR)) { 2563 Pattern.push_back(MachineCombinerPattern::MC_MULSUBWI_OP1); 2564 Found = true; 2565 } 2566 break; 2567 case AArch64::SUBXri: 2568 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, 2569 AArch64::XZR)) { 2570 Pattern.push_back(MachineCombinerPattern::MC_MULSUBXI_OP1); 2571 Found = true; 2572 } 2573 break; 2574 } 2575 return Found; 2576 } 2577 2578 /// genMadd - Generate madd instruction and combine mul and add. 2579 /// Example: 2580 /// MUL I=A,B,0 2581 /// ADD R,I,C 2582 /// ==> MADD R,A,B,C 2583 /// \param Root is the ADD instruction 2584 /// \param [out] InsInstrs is a vector of machine instructions and will 2585 /// contain the generated madd instruction 2586 /// \param IdxMulOpd is index of operand in Root that is the result of 2587 /// the MUL. In the example above IdxMulOpd is 1. 2588 /// \param MaddOpc the opcode fo the madd instruction 2589 static MachineInstr *genMadd(MachineFunction &MF, MachineRegisterInfo &MRI, 2590 const TargetInstrInfo *TII, MachineInstr &Root, 2591 SmallVectorImpl<MachineInstr *> &InsInstrs, 2592 unsigned IdxMulOpd, unsigned MaddOpc, 2593 const TargetRegisterClass *RC) { 2594 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 2595 2596 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1; 2597 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 2598 unsigned ResultReg = Root.getOperand(0).getReg(); 2599 unsigned SrcReg0 = MUL->getOperand(1).getReg(); 2600 bool Src0IsKill = MUL->getOperand(1).isKill(); 2601 unsigned SrcReg1 = MUL->getOperand(2).getReg(); 2602 bool Src1IsKill = MUL->getOperand(2).isKill(); 2603 unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg(); 2604 bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill(); 2605 2606 if (TargetRegisterInfo::isVirtualRegister(ResultReg)) 2607 MRI.constrainRegClass(ResultReg, RC); 2608 if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) 2609 MRI.constrainRegClass(SrcReg0, RC); 2610 if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) 2611 MRI.constrainRegClass(SrcReg1, RC); 2612 if (TargetRegisterInfo::isVirtualRegister(SrcReg2)) 2613 MRI.constrainRegClass(SrcReg2, RC); 2614 2615 MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), 2616 ResultReg) 2617 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 2618 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 2619 .addReg(SrcReg2, getKillRegState(Src2IsKill)); 2620 // Insert the MADD 2621 InsInstrs.push_back(MIB); 2622 return MUL; 2623 } 2624 2625 /// genMaddR - Generate madd instruction and combine mul and add using 2626 /// an extra virtual register 2627 /// Example - an ADD intermediate needs to be stored in a register: 2628 /// MUL I=A,B,0 2629 /// ADD R,I,Imm 2630 /// ==> ORR V, ZR, Imm 2631 /// ==> MADD R,A,B,V 2632 /// \param Root is the ADD instruction 2633 /// \param [out] InsInstrs is a vector of machine instructions and will 2634 /// contain the generated madd instruction 2635 /// \param IdxMulOpd is index of operand in Root that is the result of 2636 /// the MUL. In the example above IdxMulOpd is 1. 2637 /// \param MaddOpc the opcode fo the madd instruction 2638 /// \param VR is a virtual register that holds the value of an ADD operand 2639 /// (V in the example above). 2640 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, 2641 const TargetInstrInfo *TII, MachineInstr &Root, 2642 SmallVectorImpl<MachineInstr *> &InsInstrs, 2643 unsigned IdxMulOpd, unsigned MaddOpc, 2644 unsigned VR, const TargetRegisterClass *RC) { 2645 assert(IdxMulOpd == 1 || IdxMulOpd == 2); 2646 2647 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); 2648 unsigned ResultReg = Root.getOperand(0).getReg(); 2649 unsigned SrcReg0 = MUL->getOperand(1).getReg(); 2650 bool Src0IsKill = MUL->getOperand(1).isKill(); 2651 unsigned SrcReg1 = MUL->getOperand(2).getReg(); 2652 bool Src1IsKill = MUL->getOperand(2).isKill(); 2653 2654 if (TargetRegisterInfo::isVirtualRegister(ResultReg)) 2655 MRI.constrainRegClass(ResultReg, RC); 2656 if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) 2657 MRI.constrainRegClass(SrcReg0, RC); 2658 if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) 2659 MRI.constrainRegClass(SrcReg1, RC); 2660 if (TargetRegisterInfo::isVirtualRegister(VR)) 2661 MRI.constrainRegClass(VR, RC); 2662 2663 MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), 2664 ResultReg) 2665 .addReg(SrcReg0, getKillRegState(Src0IsKill)) 2666 .addReg(SrcReg1, getKillRegState(Src1IsKill)) 2667 .addReg(VR); 2668 // Insert the MADD 2669 InsInstrs.push_back(MIB); 2670 return MUL; 2671 } 2672 2673 /// genAlternativeCodeSequence - when hasPattern() finds a pattern 2674 /// this function generates the instructions that could replace the 2675 /// original code sequence 2676 void AArch64InstrInfo::genAlternativeCodeSequence( 2677 MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern, 2678 SmallVectorImpl<MachineInstr *> &InsInstrs, 2679 SmallVectorImpl<MachineInstr *> &DelInstrs, 2680 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { 2681 MachineBasicBlock &MBB = *Root.getParent(); 2682 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2683 MachineFunction &MF = *MBB.getParent(); 2684 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 2685 2686 MachineInstr *MUL; 2687 const TargetRegisterClass *RC; 2688 unsigned Opc; 2689 switch (Pattern) { 2690 default: 2691 // signal error. 2692 break; 2693 case MachineCombinerPattern::MC_MULADDW_OP1: 2694 case MachineCombinerPattern::MC_MULADDX_OP1: 2695 // MUL I=A,B,0 2696 // ADD R,I,C 2697 // ==> MADD R,A,B,C 2698 // --- Create(MADD); 2699 if (Pattern == MachineCombinerPattern::MC_MULADDW_OP1) { 2700 Opc = AArch64::MADDWrrr; 2701 RC = &AArch64::GPR32RegClass; 2702 } else { 2703 Opc = AArch64::MADDXrrr; 2704 RC = &AArch64::GPR64RegClass; 2705 } 2706 MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); 2707 break; 2708 case MachineCombinerPattern::MC_MULADDW_OP2: 2709 case MachineCombinerPattern::MC_MULADDX_OP2: 2710 // MUL I=A,B,0 2711 // ADD R,C,I 2712 // ==> MADD R,A,B,C 2713 // --- Create(MADD); 2714 if (Pattern == MachineCombinerPattern::MC_MULADDW_OP2) { 2715 Opc = AArch64::MADDWrrr; 2716 RC = &AArch64::GPR32RegClass; 2717 } else { 2718 Opc = AArch64::MADDXrrr; 2719 RC = &AArch64::GPR64RegClass; 2720 } 2721 MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 2722 break; 2723 case MachineCombinerPattern::MC_MULADDWI_OP1: 2724 case MachineCombinerPattern::MC_MULADDXI_OP1: { 2725 // MUL I=A,B,0 2726 // ADD R,I,Imm 2727 // ==> ORR V, ZR, Imm 2728 // ==> MADD R,A,B,V 2729 // --- Create(MADD); 2730 const TargetRegisterClass *OrrRC; 2731 unsigned BitSize, OrrOpc, ZeroReg; 2732 if (Pattern == MachineCombinerPattern::MC_MULADDWI_OP1) { 2733 OrrOpc = AArch64::ORRWri; 2734 OrrRC = &AArch64::GPR32spRegClass; 2735 BitSize = 32; 2736 ZeroReg = AArch64::WZR; 2737 Opc = AArch64::MADDWrrr; 2738 RC = &AArch64::GPR32RegClass; 2739 } else { 2740 OrrOpc = AArch64::ORRXri; 2741 OrrRC = &AArch64::GPR64spRegClass; 2742 BitSize = 64; 2743 ZeroReg = AArch64::XZR; 2744 Opc = AArch64::MADDXrrr; 2745 RC = &AArch64::GPR64RegClass; 2746 } 2747 unsigned NewVR = MRI.createVirtualRegister(OrrRC); 2748 uint64_t Imm = Root.getOperand(2).getImm(); 2749 2750 if (Root.getOperand(3).isImm()) { 2751 unsigned Val = Root.getOperand(3).getImm(); 2752 Imm = Imm << Val; 2753 } 2754 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); 2755 uint64_t Encoding; 2756 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 2757 MachineInstrBuilder MIB1 = 2758 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 2759 .addReg(ZeroReg) 2760 .addImm(Encoding); 2761 InsInstrs.push_back(MIB1); 2762 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 2763 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 2764 } 2765 break; 2766 } 2767 case MachineCombinerPattern::MC_MULSUBW_OP1: 2768 case MachineCombinerPattern::MC_MULSUBX_OP1: { 2769 // MUL I=A,B,0 2770 // SUB R,I, C 2771 // ==> SUB V, 0, C 2772 // ==> MADD R,A,B,V // = -C + A*B 2773 // --- Create(MADD); 2774 const TargetRegisterClass *SubRC; 2775 unsigned SubOpc, ZeroReg; 2776 if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP1) { 2777 SubOpc = AArch64::SUBWrr; 2778 SubRC = &AArch64::GPR32spRegClass; 2779 ZeroReg = AArch64::WZR; 2780 Opc = AArch64::MADDWrrr; 2781 RC = &AArch64::GPR32RegClass; 2782 } else { 2783 SubOpc = AArch64::SUBXrr; 2784 SubRC = &AArch64::GPR64spRegClass; 2785 ZeroReg = AArch64::XZR; 2786 Opc = AArch64::MADDXrrr; 2787 RC = &AArch64::GPR64RegClass; 2788 } 2789 unsigned NewVR = MRI.createVirtualRegister(SubRC); 2790 // SUB NewVR, 0, C 2791 MachineInstrBuilder MIB1 = 2792 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR) 2793 .addReg(ZeroReg) 2794 .addOperand(Root.getOperand(2)); 2795 InsInstrs.push_back(MIB1); 2796 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 2797 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 2798 break; 2799 } 2800 case MachineCombinerPattern::MC_MULSUBW_OP2: 2801 case MachineCombinerPattern::MC_MULSUBX_OP2: 2802 // MUL I=A,B,0 2803 // SUB R,C,I 2804 // ==> MSUB R,A,B,C (computes C - A*B) 2805 // --- Create(MSUB); 2806 if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP2) { 2807 Opc = AArch64::MSUBWrrr; 2808 RC = &AArch64::GPR32RegClass; 2809 } else { 2810 Opc = AArch64::MSUBXrrr; 2811 RC = &AArch64::GPR64RegClass; 2812 } 2813 MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); 2814 break; 2815 case MachineCombinerPattern::MC_MULSUBWI_OP1: 2816 case MachineCombinerPattern::MC_MULSUBXI_OP1: { 2817 // MUL I=A,B,0 2818 // SUB R,I, Imm 2819 // ==> ORR V, ZR, -Imm 2820 // ==> MADD R,A,B,V // = -Imm + A*B 2821 // --- Create(MADD); 2822 const TargetRegisterClass *OrrRC; 2823 unsigned BitSize, OrrOpc, ZeroReg; 2824 if (Pattern == MachineCombinerPattern::MC_MULSUBWI_OP1) { 2825 OrrOpc = AArch64::ORRWri; 2826 OrrRC = &AArch64::GPR32spRegClass; 2827 BitSize = 32; 2828 ZeroReg = AArch64::WZR; 2829 Opc = AArch64::MADDWrrr; 2830 RC = &AArch64::GPR32RegClass; 2831 } else { 2832 OrrOpc = AArch64::ORRXri; 2833 OrrRC = &AArch64::GPR64spRegClass; 2834 BitSize = 64; 2835 ZeroReg = AArch64::XZR; 2836 Opc = AArch64::MADDXrrr; 2837 RC = &AArch64::GPR64RegClass; 2838 } 2839 unsigned NewVR = MRI.createVirtualRegister(OrrRC); 2840 int Imm = Root.getOperand(2).getImm(); 2841 if (Root.getOperand(3).isImm()) { 2842 unsigned Val = Root.getOperand(3).getImm(); 2843 Imm = Imm << Val; 2844 } 2845 uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize); 2846 uint64_t Encoding; 2847 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 2848 MachineInstrBuilder MIB1 = 2849 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR) 2850 .addReg(ZeroReg) 2851 .addImm(Encoding); 2852 InsInstrs.push_back(MIB1); 2853 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 2854 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); 2855 } 2856 break; 2857 } 2858 } // end switch (Pattern) 2859 // Record MUL and ADD/SUB for deletion 2860 DelInstrs.push_back(MUL); 2861 DelInstrs.push_back(&Root); 2862 2863 return; 2864 } 2865 2866 /// \brief Replace csincr-branch sequence by simple conditional branch 2867 /// 2868 /// Examples: 2869 /// 1. 2870 /// csinc w9, wzr, wzr, <condition code> 2871 /// tbnz w9, #0, 0x44 2872 /// to 2873 /// b.<inverted condition code> 2874 /// 2875 /// 2. 2876 /// csinc w9, wzr, wzr, <condition code> 2877 /// tbz w9, #0, 0x44 2878 /// to 2879 /// b.<condition code> 2880 /// 2881 /// \param MI Conditional Branch 2882 /// \return True when the simple conditional branch is generated 2883 /// 2884 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const { 2885 bool IsNegativeBranch = false; 2886 bool IsTestAndBranch = false; 2887 unsigned TargetBBInMI = 0; 2888 switch (MI->getOpcode()) { 2889 default: 2890 llvm_unreachable("Unknown branch instruction?"); 2891 case AArch64::Bcc: 2892 return false; 2893 case AArch64::CBZW: 2894 case AArch64::CBZX: 2895 TargetBBInMI = 1; 2896 break; 2897 case AArch64::CBNZW: 2898 case AArch64::CBNZX: 2899 TargetBBInMI = 1; 2900 IsNegativeBranch = true; 2901 break; 2902 case AArch64::TBZW: 2903 case AArch64::TBZX: 2904 TargetBBInMI = 2; 2905 IsTestAndBranch = true; 2906 break; 2907 case AArch64::TBNZW: 2908 case AArch64::TBNZX: 2909 TargetBBInMI = 2; 2910 IsNegativeBranch = true; 2911 IsTestAndBranch = true; 2912 break; 2913 } 2914 // So we increment a zero register and test for bits other 2915 // than bit 0? Conservatively bail out in case the verifier 2916 // missed this case. 2917 if (IsTestAndBranch && MI->getOperand(1).getImm()) 2918 return false; 2919 2920 // Find Definition. 2921 assert(MI->getParent() && "Incomplete machine instruciton\n"); 2922 MachineBasicBlock *MBB = MI->getParent(); 2923 MachineFunction *MF = MBB->getParent(); 2924 MachineRegisterInfo *MRI = &MF->getRegInfo(); 2925 unsigned VReg = MI->getOperand(0).getReg(); 2926 if (!TargetRegisterInfo::isVirtualRegister(VReg)) 2927 return false; 2928 2929 MachineInstr *DefMI = MRI->getVRegDef(VReg); 2930 2931 // Look for CSINC 2932 if (!(DefMI->getOpcode() == AArch64::CSINCWr && 2933 DefMI->getOperand(1).getReg() == AArch64::WZR && 2934 DefMI->getOperand(2).getReg() == AArch64::WZR) && 2935 !(DefMI->getOpcode() == AArch64::CSINCXr && 2936 DefMI->getOperand(1).getReg() == AArch64::XZR && 2937 DefMI->getOperand(2).getReg() == AArch64::XZR)) 2938 return false; 2939 2940 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1) 2941 return false; 2942 2943 AArch64CC::CondCode CC = 2944 (AArch64CC::CondCode)DefMI->getOperand(3).getImm(); 2945 bool CheckOnlyCCWrites = true; 2946 // Convert only when the condition code is not modified between 2947 // the CSINC and the branch. The CC may be used by other 2948 // instructions in between. 2949 if (modifiesConditionCode(DefMI, MI, CheckOnlyCCWrites, &getRegisterInfo())) 2950 return false; 2951 MachineBasicBlock &RefToMBB = *MBB; 2952 MachineBasicBlock *TBB = MI->getOperand(TargetBBInMI).getMBB(); 2953 DebugLoc DL = MI->getDebugLoc(); 2954 if (IsNegativeBranch) 2955 CC = AArch64CC::getInvertedCondCode(CC); 2956 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB); 2957 MI->eraseFromParent(); 2958 return true; 2959 } 2960