1 //===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the AArch64ConditionalCompares pass which reduces 11 // branching and code size by using the conditional compare instructions CCMP, 12 // CCMN, and FCMP. 13 // 14 // The CFG transformations for forming conditional compares are very similar to 15 // if-conversion, and this pass should run immediately before the early 16 // if-conversion pass. 17 // 18 //===----------------------------------------------------------------------===// 19 20 #include "AArch64.h" 21 #include "llvm/ADT/BitVector.h" 22 #include "llvm/ADT/DepthFirstIterator.h" 23 #include "llvm/ADT/SetVector.h" 24 #include "llvm/ADT/SmallPtrSet.h" 25 #include "llvm/ADT/SparseSet.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 28 #include "llvm/CodeGen/MachineDominators.h" 29 #include "llvm/CodeGen/MachineFunction.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstrBuilder.h" 32 #include "llvm/CodeGen/MachineLoopInfo.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/MachineTraceMetrics.h" 35 #include "llvm/CodeGen/Passes.h" 36 #include "llvm/Support/CommandLine.h" 37 #include "llvm/Support/Debug.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include "llvm/Target/TargetInstrInfo.h" 40 #include "llvm/Target/TargetRegisterInfo.h" 41 #include "llvm/Target/TargetSubtargetInfo.h" 42 43 using namespace llvm; 44 45 #define DEBUG_TYPE "aarch64-ccmp" 46 47 // Absolute maximum number of instructions allowed per speculated block. 48 // This bypasses all other heuristics, so it should be set fairly high. 49 static cl::opt<unsigned> BlockInstrLimit( 50 "aarch64-ccmp-limit", cl::init(30), cl::Hidden, 51 cl::desc("Maximum number of instructions per speculated block.")); 52 53 // Stress testing mode - disable heuristics. 54 static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden, 55 cl::desc("Turn all knobs to 11")); 56 57 STATISTIC(NumConsidered, "Number of ccmps considered"); 58 STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)"); 59 STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)"); 60 STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)"); 61 STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)"); 62 STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)"); 63 STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)"); 64 STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)"); 65 STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)"); 66 STATISTIC(NumMultNZCVUses, "Number of ccmps rejected (NZCV used)"); 67 STATISTIC(NumUnknNZCVDefs, "Number of ccmps rejected (NZCV def unknown)"); 68 69 STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)"); 70 71 STATISTIC(NumConverted, "Number of ccmp instructions created"); 72 STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted"); 73 74 //===----------------------------------------------------------------------===// 75 // SSACCmpConv 76 //===----------------------------------------------------------------------===// 77 // 78 // The SSACCmpConv class performs ccmp-conversion on SSA form machine code 79 // after determining if it is possible. The class contains no heuristics; 80 // external code should be used to determine when ccmp-conversion is a good 81 // idea. 82 // 83 // CCmp-formation works on a CFG representing chained conditions, typically 84 // from C's short-circuit || and && operators: 85 // 86 // From: Head To: Head 87 // / | CmpBB 88 // / | / | 89 // | CmpBB / | 90 // | / | Tail | 91 // | / | | | 92 // Tail | | | 93 // | | | | 94 // ... ... ... ... 95 // 96 // The Head block is terminated by a br.cond instruction, and the CmpBB block 97 // contains compare + br.cond. Tail must be a successor of both. 98 // 99 // The cmp-conversion turns the compare instruction in CmpBB into a conditional 100 // compare, and merges CmpBB into Head, speculatively executing its 101 // instructions. The AArch64 conditional compare instructions have an immediate 102 // operand that specifies the NZCV flag values when the condition is false and 103 // the compare isn't executed. This makes it possible to chain compares with 104 // different condition codes. 105 // 106 // Example: 107 // 108 // if (a == 5 || b == 17) 109 // foo(); 110 // 111 // Head: 112 // cmp w0, #5 113 // b.eq Tail 114 // CmpBB: 115 // cmp w1, #17 116 // b.eq Tail 117 // ... 118 // Tail: 119 // bl _foo 120 // 121 // Becomes: 122 // 123 // Head: 124 // cmp w0, #5 125 // ccmp w1, #17, 4, ne ; 4 = nZcv 126 // b.eq Tail 127 // ... 128 // Tail: 129 // bl _foo 130 // 131 // The ccmp condition code is the one that would cause the Head terminator to 132 // branch to CmpBB. 133 // 134 // FIXME: It should also be possible to speculate a block on the critical edge 135 // between Head and Tail, just like if-converting a diamond. 136 // 137 // FIXME: Handle PHIs in Tail by turning them into selects (if-conversion). 138 139 namespace { 140 class SSACCmpConv { 141 MachineFunction *MF; 142 const TargetInstrInfo *TII; 143 const TargetRegisterInfo *TRI; 144 MachineRegisterInfo *MRI; 145 146 public: 147 /// The first block containing a conditional branch, dominating everything 148 /// else. 149 MachineBasicBlock *Head; 150 151 /// The block containing cmp+br.cond with a successor shared with Head. 152 MachineBasicBlock *CmpBB; 153 154 /// The common successor for Head and CmpBB. 155 MachineBasicBlock *Tail; 156 157 /// The compare instruction in CmpBB that can be converted to a ccmp. 158 MachineInstr *CmpMI; 159 160 private: 161 /// The branch condition in Head as determined by AnalyzeBranch. 162 SmallVector<MachineOperand, 4> HeadCond; 163 164 /// The condition code that makes Head branch to CmpBB. 165 AArch64CC::CondCode HeadCmpBBCC; 166 167 /// The branch condition in CmpBB. 168 SmallVector<MachineOperand, 4> CmpBBCond; 169 170 /// The condition code that makes CmpBB branch to Tail. 171 AArch64CC::CondCode CmpBBTailCC; 172 173 /// Check if the Tail PHIs are trivially convertible. 174 bool trivialTailPHIs(); 175 176 /// Remove CmpBB from the Tail PHIs. 177 void updateTailPHIs(); 178 179 /// Check if an operand defining DstReg is dead. 180 bool isDeadDef(unsigned DstReg); 181 182 /// Find the compare instruction in MBB that controls the conditional branch. 183 /// Return NULL if a convertible instruction can't be found. 184 MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB); 185 186 /// Return true if all non-terminator instructions in MBB can be safely 187 /// speculated. 188 bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI); 189 190 public: 191 /// runOnMachineFunction - Initialize per-function data structures. 192 void runOnMachineFunction(MachineFunction &MF) { 193 this->MF = &MF; 194 TII = MF.getSubtarget().getInstrInfo(); 195 TRI = MF.getSubtarget().getRegisterInfo(); 196 MRI = &MF.getRegInfo(); 197 } 198 199 /// If the sub-CFG headed by MBB can be cmp-converted, initialize the 200 /// internal state, and return true. 201 bool canConvert(MachineBasicBlock *MBB); 202 203 /// Cmo-convert the last block passed to canConvertCmp(), assuming 204 /// it is possible. Add any erased blocks to RemovedBlocks. 205 void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks); 206 207 /// Return the expected code size delta if the conversion into a 208 /// conditional compare is performed. 209 int expectedCodeSizeDelta() const; 210 }; 211 } // end anonymous namespace 212 213 // Check that all PHIs in Tail are selecting the same value from Head and CmpBB. 214 // This means that no if-conversion is required when merging CmpBB into Head. 215 bool SSACCmpConv::trivialTailPHIs() { 216 for (auto &I : *Tail) { 217 if (!I.isPHI()) 218 break; 219 unsigned HeadReg = 0, CmpBBReg = 0; 220 // PHI operands come in (VReg, MBB) pairs. 221 for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) { 222 MachineBasicBlock *MBB = I.getOperand(oi + 1).getMBB(); 223 unsigned Reg = I.getOperand(oi).getReg(); 224 if (MBB == Head) { 225 assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands"); 226 HeadReg = Reg; 227 } 228 if (MBB == CmpBB) { 229 assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands"); 230 CmpBBReg = Reg; 231 } 232 } 233 if (HeadReg != CmpBBReg) 234 return false; 235 } 236 return true; 237 } 238 239 // Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply 240 // removing the CmpBB operands. The Head operands will be identical. 241 void SSACCmpConv::updateTailPHIs() { 242 for (auto &I : *Tail) { 243 if (!I.isPHI()) 244 break; 245 // I is a PHI. It can have multiple entries for CmpBB. 246 for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) { 247 // PHI operands are (Reg, MBB) at (oi-2, oi-1). 248 if (I.getOperand(oi - 1).getMBB() == CmpBB) { 249 I.RemoveOperand(oi - 1); 250 I.RemoveOperand(oi - 2); 251 } 252 } 253 } 254 } 255 256 // This pass runs before the AArch64DeadRegisterDefinitions pass, so compares 257 // are still writing virtual registers without any uses. 258 bool SSACCmpConv::isDeadDef(unsigned DstReg) { 259 // Writes to the zero register are dead. 260 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR) 261 return true; 262 if (!TargetRegisterInfo::isVirtualRegister(DstReg)) 263 return false; 264 // A virtual register def without any uses will be marked dead later, and 265 // eventually replaced by the zero register. 266 return MRI->use_nodbg_empty(DstReg); 267 } 268 269 // Parse a condition code returned by AnalyzeBranch, and compute the CondCode 270 // corresponding to TBB. 271 // Return 272 static bool parseCond(ArrayRef<MachineOperand> Cond, AArch64CC::CondCode &CC) { 273 // A normal br.cond simply has the condition code. 274 if (Cond[0].getImm() != -1) { 275 assert(Cond.size() == 1 && "Unknown Cond array format"); 276 CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); 277 return true; 278 } 279 // For tbz and cbz instruction, the opcode is next. 280 switch (Cond[1].getImm()) { 281 default: 282 // This includes tbz / tbnz branches which can't be converted to 283 // ccmp + br.cond. 284 return false; 285 case AArch64::CBZW: 286 case AArch64::CBZX: 287 assert(Cond.size() == 3 && "Unknown Cond array format"); 288 CC = AArch64CC::EQ; 289 return true; 290 case AArch64::CBNZW: 291 case AArch64::CBNZX: 292 assert(Cond.size() == 3 && "Unknown Cond array format"); 293 CC = AArch64CC::NE; 294 return true; 295 } 296 } 297 298 MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) { 299 MachineBasicBlock::iterator I = MBB->getFirstTerminator(); 300 if (I == MBB->end()) 301 return nullptr; 302 // The terminator must be controlled by the flags. 303 if (!I->readsRegister(AArch64::NZCV)) { 304 switch (I->getOpcode()) { 305 case AArch64::CBZW: 306 case AArch64::CBZX: 307 case AArch64::CBNZW: 308 case AArch64::CBNZX: 309 // These can be converted into a ccmp against #0. 310 return I; 311 } 312 ++NumCmpTermRejs; 313 DEBUG(dbgs() << "Flags not used by terminator: " << *I); 314 return nullptr; 315 } 316 317 // Now find the instruction controlling the terminator. 318 for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) { 319 --I; 320 assert(!I->isTerminator() && "Spurious terminator"); 321 switch (I->getOpcode()) { 322 // cmp is an alias for subs with a dead destination register. 323 case AArch64::SUBSWri: 324 case AArch64::SUBSXri: 325 // cmn is an alias for adds with a dead destination register. 326 case AArch64::ADDSWri: 327 case AArch64::ADDSXri: 328 // Check that the immediate operand is within range, ccmp wants a uimm5. 329 // Rd = SUBSri Rn, imm, shift 330 if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) { 331 DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I); 332 ++NumImmRangeRejs; 333 return nullptr; 334 } 335 // Fall through. 336 case AArch64::SUBSWrr: 337 case AArch64::SUBSXrr: 338 case AArch64::ADDSWrr: 339 case AArch64::ADDSXrr: 340 if (isDeadDef(I->getOperand(0).getReg())) 341 return I; 342 DEBUG(dbgs() << "Can't convert compare with live destination: " << *I); 343 ++NumLiveDstRejs; 344 return nullptr; 345 case AArch64::FCMPSrr: 346 case AArch64::FCMPDrr: 347 case AArch64::FCMPESrr: 348 case AArch64::FCMPEDrr: 349 return I; 350 } 351 352 // Check for flag reads and clobbers. 353 MIOperands::PhysRegInfo PRI = 354 MIOperands(I).analyzePhysReg(AArch64::NZCV, TRI); 355 356 if (PRI.Reads) { 357 // The ccmp doesn't produce exactly the same flags as the original 358 // compare, so reject the transform if there are uses of the flags 359 // besides the terminators. 360 DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I); 361 ++NumMultNZCVUses; 362 return nullptr; 363 } 364 365 if (PRI.Clobbers) { 366 DEBUG(dbgs() << "Not convertible compare: " << *I); 367 ++NumUnknNZCVDefs; 368 return nullptr; 369 } 370 } 371 DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n'); 372 return nullptr; 373 } 374 375 /// Determine if all the instructions in MBB can safely 376 /// be speculated. The terminators are not considered. 377 /// 378 /// Only CmpMI is allowed to clobber the flags. 379 /// 380 bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB, 381 const MachineInstr *CmpMI) { 382 // Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to 383 // get right. 384 if (!MBB->livein_empty()) { 385 DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n"); 386 return false; 387 } 388 389 unsigned InstrCount = 0; 390 391 // Check all instructions, except the terminators. It is assumed that 392 // terminators never have side effects or define any used register values. 393 for (auto &I : make_range(MBB->begin(), MBB->getFirstTerminator())) { 394 if (I.isDebugValue()) 395 continue; 396 397 if (++InstrCount > BlockInstrLimit && !Stress) { 398 DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than " 399 << BlockInstrLimit << " instructions.\n"); 400 return false; 401 } 402 403 // There shouldn't normally be any phis in a single-predecessor block. 404 if (I.isPHI()) { 405 DEBUG(dbgs() << "Can't hoist: " << I); 406 return false; 407 } 408 409 // Don't speculate loads. Note that it may be possible and desirable to 410 // speculate GOT or constant pool loads that are guaranteed not to trap, 411 // but we don't support that for now. 412 if (I.mayLoad()) { 413 DEBUG(dbgs() << "Won't speculate load: " << I); 414 return false; 415 } 416 417 // We never speculate stores, so an AA pointer isn't necessary. 418 bool DontMoveAcrossStore = true; 419 if (!I.isSafeToMove(TII, nullptr, DontMoveAcrossStore)) { 420 DEBUG(dbgs() << "Can't speculate: " << I); 421 return false; 422 } 423 424 // Only CmpMI is allowed to clobber the flags. 425 if (&I != CmpMI && I.modifiesRegister(AArch64::NZCV, TRI)) { 426 DEBUG(dbgs() << "Clobbers flags: " << I); 427 return false; 428 } 429 } 430 return true; 431 } 432 433 /// Analyze the sub-cfg rooted in MBB, and return true if it is a potential 434 /// candidate for cmp-conversion. Fill out the internal state. 435 /// 436 bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) { 437 Head = MBB; 438 Tail = CmpBB = nullptr; 439 440 if (Head->succ_size() != 2) 441 return false; 442 MachineBasicBlock *Succ0 = Head->succ_begin()[0]; 443 MachineBasicBlock *Succ1 = Head->succ_begin()[1]; 444 445 // CmpBB can only have a single predecessor. Tail is allowed many. 446 if (Succ0->pred_size() != 1) 447 std::swap(Succ0, Succ1); 448 449 // Succ0 is our candidate for CmpBB. 450 if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2) 451 return false; 452 453 CmpBB = Succ0; 454 Tail = Succ1; 455 456 if (!CmpBB->isSuccessor(Tail)) 457 return false; 458 459 // The CFG topology checks out. 460 DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber() << " -> BB#" 461 << CmpBB->getNumber() << " -> BB#" << Tail->getNumber() << '\n'); 462 ++NumConsidered; 463 464 // Tail is allowed to have many predecessors, but we can't handle PHIs yet. 465 // 466 // FIXME: Real PHIs could be if-converted as long as the CmpBB values are 467 // defined before The CmpBB cmp clobbers the flags. Alternatively, it should 468 // always be safe to sink the ccmp down to immediately before the CmpBB 469 // terminators. 470 if (!trivialTailPHIs()) { 471 DEBUG(dbgs() << "Can't handle phis in Tail.\n"); 472 ++NumPhiRejs; 473 return false; 474 } 475 476 if (!Tail->livein_empty()) { 477 DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n"); 478 ++NumPhysRejs; 479 return false; 480 } 481 482 // CmpBB should never have PHIs since Head is its only predecessor. 483 // FIXME: Clean them up if it happens. 484 if (!CmpBB->empty() && CmpBB->front().isPHI()) { 485 DEBUG(dbgs() << "Can't handle phis in CmpBB.\n"); 486 ++NumPhi2Rejs; 487 return false; 488 } 489 490 if (!CmpBB->livein_empty()) { 491 DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n"); 492 ++NumPhysRejs; 493 return false; 494 } 495 496 // The branch we're looking to eliminate must be analyzable. 497 HeadCond.clear(); 498 MachineBasicBlock *TBB = nullptr, *FBB = nullptr; 499 if (TII->AnalyzeBranch(*Head, TBB, FBB, HeadCond)) { 500 DEBUG(dbgs() << "Head branch not analyzable.\n"); 501 ++NumHeadBranchRejs; 502 return false; 503 } 504 505 // This is weird, probably some sort of degenerate CFG, or an edge to a 506 // landing pad. 507 if (!TBB || HeadCond.empty()) { 508 DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in Head.\n"); 509 ++NumHeadBranchRejs; 510 return false; 511 } 512 513 if (!parseCond(HeadCond, HeadCmpBBCC)) { 514 DEBUG(dbgs() << "Unsupported branch type on Head\n"); 515 ++NumHeadBranchRejs; 516 return false; 517 } 518 519 // Make sure the branch direction is right. 520 if (TBB != CmpBB) { 521 assert(TBB == Tail && "Unexpected TBB"); 522 HeadCmpBBCC = AArch64CC::getInvertedCondCode(HeadCmpBBCC); 523 } 524 525 CmpBBCond.clear(); 526 TBB = FBB = nullptr; 527 if (TII->AnalyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) { 528 DEBUG(dbgs() << "CmpBB branch not analyzable.\n"); 529 ++NumCmpBranchRejs; 530 return false; 531 } 532 533 if (!TBB || CmpBBCond.empty()) { 534 DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in CmpBB.\n"); 535 ++NumCmpBranchRejs; 536 return false; 537 } 538 539 if (!parseCond(CmpBBCond, CmpBBTailCC)) { 540 DEBUG(dbgs() << "Unsupported branch type on CmpBB\n"); 541 ++NumCmpBranchRejs; 542 return false; 543 } 544 545 if (TBB != Tail) 546 CmpBBTailCC = AArch64CC::getInvertedCondCode(CmpBBTailCC); 547 548 DEBUG(dbgs() << "Head->CmpBB on " << AArch64CC::getCondCodeName(HeadCmpBBCC) 549 << ", CmpBB->Tail on " << AArch64CC::getCondCodeName(CmpBBTailCC) 550 << '\n'); 551 552 CmpMI = findConvertibleCompare(CmpBB); 553 if (!CmpMI) 554 return false; 555 556 if (!canSpeculateInstrs(CmpBB, CmpMI)) { 557 ++NumSpeculateRejs; 558 return false; 559 } 560 return true; 561 } 562 563 void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) { 564 DEBUG(dbgs() << "Merging BB#" << CmpBB->getNumber() << " into BB#" 565 << Head->getNumber() << ":\n" << *CmpBB); 566 567 // All CmpBB instructions are moved into Head, and CmpBB is deleted. 568 // Update the CFG first. 569 updateTailPHIs(); 570 Head->removeSuccessor(CmpBB); 571 CmpBB->removeSuccessor(Tail); 572 Head->transferSuccessorsAndUpdatePHIs(CmpBB); 573 DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc(); 574 TII->RemoveBranch(*Head); 575 576 // If the Head terminator was one of the cbz / tbz branches with built-in 577 // compare, we need to insert an explicit compare instruction in its place. 578 if (HeadCond[0].getImm() == -1) { 579 ++NumCompBranches; 580 unsigned Opc = 0; 581 switch (HeadCond[1].getImm()) { 582 case AArch64::CBZW: 583 case AArch64::CBNZW: 584 Opc = AArch64::SUBSWri; 585 break; 586 case AArch64::CBZX: 587 case AArch64::CBNZX: 588 Opc = AArch64::SUBSXri; 589 break; 590 default: 591 llvm_unreachable("Cannot convert Head branch"); 592 } 593 const MCInstrDesc &MCID = TII->get(Opc); 594 // Create a dummy virtual register for the SUBS def. 595 unsigned DestReg = 596 MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF)); 597 // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz. 598 BuildMI(*Head, Head->end(), TermDL, MCID) 599 .addReg(DestReg, RegState::Define | RegState::Dead) 600 .addOperand(HeadCond[2]) 601 .addImm(0) 602 .addImm(0); 603 // SUBS uses the GPR*sp register classes. 604 MRI->constrainRegClass(HeadCond[2].getReg(), 605 TII->getRegClass(MCID, 1, TRI, *MF)); 606 } 607 608 Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end()); 609 610 // Now replace CmpMI with a ccmp instruction that also considers the incoming 611 // flags. 612 unsigned Opc = 0; 613 unsigned FirstOp = 1; // First CmpMI operand to copy. 614 bool isZBranch = false; // CmpMI is a cbz/cbnz instruction. 615 switch (CmpMI->getOpcode()) { 616 default: 617 llvm_unreachable("Unknown compare opcode"); 618 case AArch64::SUBSWri: Opc = AArch64::CCMPWi; break; 619 case AArch64::SUBSWrr: Opc = AArch64::CCMPWr; break; 620 case AArch64::SUBSXri: Opc = AArch64::CCMPXi; break; 621 case AArch64::SUBSXrr: Opc = AArch64::CCMPXr; break; 622 case AArch64::ADDSWri: Opc = AArch64::CCMNWi; break; 623 case AArch64::ADDSWrr: Opc = AArch64::CCMNWr; break; 624 case AArch64::ADDSXri: Opc = AArch64::CCMNXi; break; 625 case AArch64::ADDSXrr: Opc = AArch64::CCMNXr; break; 626 case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0; break; 627 case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0; break; 628 case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0; break; 629 case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0; break; 630 case AArch64::CBZW: 631 case AArch64::CBNZW: 632 Opc = AArch64::CCMPWi; 633 FirstOp = 0; 634 isZBranch = true; 635 break; 636 case AArch64::CBZX: 637 case AArch64::CBNZX: 638 Opc = AArch64::CCMPXi; 639 FirstOp = 0; 640 isZBranch = true; 641 break; 642 } 643 644 // The ccmp instruction should set the flags according to the comparison when 645 // Head would have branched to CmpBB. 646 // The NZCV immediate operand should provide flags for the case where Head 647 // would have branched to Tail. These flags should cause the new Head 648 // terminator to branch to tail. 649 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC); 650 const MCInstrDesc &MCID = TII->get(Opc); 651 MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(), 652 TII->getRegClass(MCID, 0, TRI, *MF)); 653 if (CmpMI->getOperand(FirstOp + 1).isReg()) 654 MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(), 655 TII->getRegClass(MCID, 1, TRI, *MF)); 656 MachineInstrBuilder MIB = 657 BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID) 658 .addOperand(CmpMI->getOperand(FirstOp)); // Register Rn 659 if (isZBranch) 660 MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0 661 else 662 MIB.addOperand(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate 663 MIB.addImm(NZCV).addImm(HeadCmpBBCC); 664 665 // If CmpMI was a terminator, we need a new conditional branch to replace it. 666 // This now becomes a Head terminator. 667 if (isZBranch) { 668 bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW || 669 CmpMI->getOpcode() == AArch64::CBNZX; 670 BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc)) 671 .addImm(isNZ ? AArch64CC::NE : AArch64CC::EQ) 672 .addOperand(CmpMI->getOperand(1)); // Branch target. 673 } 674 CmpMI->eraseFromParent(); 675 Head->updateTerminator(); 676 677 RemovedBlocks.push_back(CmpBB); 678 CmpBB->eraseFromParent(); 679 DEBUG(dbgs() << "Result:\n" << *Head); 680 ++NumConverted; 681 } 682 683 int SSACCmpConv::expectedCodeSizeDelta() const { 684 int delta = 0; 685 // If the Head terminator was one of the cbz / tbz branches with built-in 686 // compare, we need to insert an explicit compare instruction in its place 687 // plus a branch instruction. 688 if (HeadCond[0].getImm() == -1) { 689 switch (HeadCond[1].getImm()) { 690 case AArch64::CBZW: 691 case AArch64::CBNZW: 692 case AArch64::CBZX: 693 case AArch64::CBNZX: 694 // Therefore delta += 1 695 delta = 1; 696 break; 697 default: 698 llvm_unreachable("Cannot convert Head branch"); 699 } 700 } 701 // If the Cmp terminator was one of the cbz / tbz branches with 702 // built-in compare, it will be turned into a compare instruction 703 // into Head, but we do not save any instruction. 704 // Otherwise, we save the branch instruction. 705 switch (CmpMI->getOpcode()) { 706 default: 707 --delta; 708 break; 709 case AArch64::CBZW: 710 case AArch64::CBNZW: 711 case AArch64::CBZX: 712 case AArch64::CBNZX: 713 break; 714 } 715 return delta; 716 } 717 718 //===----------------------------------------------------------------------===// 719 // AArch64ConditionalCompares Pass 720 //===----------------------------------------------------------------------===// 721 722 namespace { 723 class AArch64ConditionalCompares : public MachineFunctionPass { 724 const TargetInstrInfo *TII; 725 const TargetRegisterInfo *TRI; 726 MCSchedModel SchedModel; 727 // Does the proceeded function has Oz attribute. 728 bool MinSize; 729 MachineRegisterInfo *MRI; 730 MachineDominatorTree *DomTree; 731 MachineLoopInfo *Loops; 732 MachineTraceMetrics *Traces; 733 MachineTraceMetrics::Ensemble *MinInstr; 734 SSACCmpConv CmpConv; 735 736 public: 737 static char ID; 738 AArch64ConditionalCompares() : MachineFunctionPass(ID) {} 739 void getAnalysisUsage(AnalysisUsage &AU) const override; 740 bool runOnMachineFunction(MachineFunction &MF) override; 741 const char *getPassName() const override { 742 return "AArch64 Conditional Compares"; 743 } 744 745 private: 746 bool tryConvert(MachineBasicBlock *); 747 void updateDomTree(ArrayRef<MachineBasicBlock *> Removed); 748 void updateLoops(ArrayRef<MachineBasicBlock *> Removed); 749 void invalidateTraces(); 750 bool shouldConvert(); 751 }; 752 } // end anonymous namespace 753 754 char AArch64ConditionalCompares::ID = 0; 755 756 namespace llvm { 757 void initializeAArch64ConditionalComparesPass(PassRegistry &); 758 } 759 760 INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp", 761 "AArch64 CCMP Pass", false, false) 762 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) 763 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 764 INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) 765 INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp", 766 "AArch64 CCMP Pass", false, false) 767 768 FunctionPass *llvm::createAArch64ConditionalCompares() { 769 return new AArch64ConditionalCompares(); 770 } 771 772 void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const { 773 AU.addRequired<MachineBranchProbabilityInfo>(); 774 AU.addRequired<MachineDominatorTree>(); 775 AU.addPreserved<MachineDominatorTree>(); 776 AU.addRequired<MachineLoopInfo>(); 777 AU.addPreserved<MachineLoopInfo>(); 778 AU.addRequired<MachineTraceMetrics>(); 779 AU.addPreserved<MachineTraceMetrics>(); 780 MachineFunctionPass::getAnalysisUsage(AU); 781 } 782 783 /// Update the dominator tree after if-conversion erased some blocks. 784 void AArch64ConditionalCompares::updateDomTree( 785 ArrayRef<MachineBasicBlock *> Removed) { 786 // convert() removes CmpBB which was previously dominated by Head. 787 // CmpBB children should be transferred to Head. 788 MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head); 789 for (unsigned i = 0, e = Removed.size(); i != e; ++i) { 790 MachineDomTreeNode *Node = DomTree->getNode(Removed[i]); 791 assert(Node != HeadNode && "Cannot erase the head node"); 792 assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head"); 793 while (Node->getNumChildren()) 794 DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode); 795 DomTree->eraseNode(Removed[i]); 796 } 797 } 798 799 /// Update LoopInfo after if-conversion. 800 void 801 AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) { 802 if (!Loops) 803 return; 804 for (unsigned i = 0, e = Removed.size(); i != e; ++i) 805 Loops->removeBlock(Removed[i]); 806 } 807 808 /// Invalidate MachineTraceMetrics before if-conversion. 809 void AArch64ConditionalCompares::invalidateTraces() { 810 Traces->invalidate(CmpConv.Head); 811 Traces->invalidate(CmpConv.CmpBB); 812 } 813 814 /// Apply cost model and heuristics to the if-conversion in IfConv. 815 /// Return true if the conversion is a good idea. 816 /// 817 bool AArch64ConditionalCompares::shouldConvert() { 818 // Stress testing mode disables all cost considerations. 819 if (Stress) 820 return true; 821 if (!MinInstr) 822 MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); 823 824 // Head dominates CmpBB, so it is always included in its trace. 825 MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB); 826 827 // If code size is the main concern 828 if (MinSize) { 829 int CodeSizeDelta = CmpConv.expectedCodeSizeDelta(); 830 DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta << '\n'); 831 // If we are minimizing the code size, do the conversion whatever 832 // the cost is. 833 if (CodeSizeDelta < 0) 834 return true; 835 if (CodeSizeDelta > 0) { 836 DEBUG(dbgs() << "Code size is increasing, give up on this one.\n"); 837 return false; 838 } 839 // CodeSizeDelta == 0, continue with the regular heuristics 840 } 841 842 // Heuristic: The compare conversion delays the execution of the branch 843 // instruction because we must wait for the inputs to the second compare as 844 // well. The branch has no dependent instructions, but delaying it increases 845 // the cost of a misprediction. 846 // 847 // Set a limit on the delay we will accept. 848 unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4; 849 850 // Instruction depths can be computed for all trace instructions above CmpBB. 851 unsigned HeadDepth = 852 Trace.getInstrCycles(CmpConv.Head->getFirstTerminator()).Depth; 853 unsigned CmpBBDepth = 854 Trace.getInstrCycles(CmpConv.CmpBB->getFirstTerminator()).Depth; 855 DEBUG(dbgs() << "Head depth: " << HeadDepth 856 << "\nCmpBB depth: " << CmpBBDepth << '\n'); 857 if (CmpBBDepth > HeadDepth + DelayLimit) { 858 DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit 859 << " cycles.\n"); 860 return false; 861 } 862 863 // Check the resource depth at the bottom of CmpBB - these instructions will 864 // be speculated. 865 unsigned ResDepth = Trace.getResourceDepth(true); 866 DEBUG(dbgs() << "Resources: " << ResDepth << '\n'); 867 868 // Heuristic: The speculatively executed instructions must all be able to 869 // merge into the Head block. The Head critical path should dominate the 870 // resource cost of the speculated instructions. 871 if (ResDepth > HeadDepth) { 872 DEBUG(dbgs() << "Too many instructions to speculate.\n"); 873 return false; 874 } 875 return true; 876 } 877 878 bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) { 879 bool Changed = false; 880 while (CmpConv.canConvert(MBB) && shouldConvert()) { 881 invalidateTraces(); 882 SmallVector<MachineBasicBlock *, 4> RemovedBlocks; 883 CmpConv.convert(RemovedBlocks); 884 Changed = true; 885 updateDomTree(RemovedBlocks); 886 updateLoops(RemovedBlocks); 887 } 888 return Changed; 889 } 890 891 bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) { 892 DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n" 893 << "********** Function: " << MF.getName() << '\n'); 894 TII = MF.getSubtarget().getInstrInfo(); 895 TRI = MF.getSubtarget().getRegisterInfo(); 896 SchedModel = MF.getSubtarget().getSchedModel(); 897 MRI = &MF.getRegInfo(); 898 DomTree = &getAnalysis<MachineDominatorTree>(); 899 Loops = getAnalysisIfAvailable<MachineLoopInfo>(); 900 Traces = &getAnalysis<MachineTraceMetrics>(); 901 MinInstr = nullptr; 902 MinSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize); 903 904 bool Changed = false; 905 CmpConv.runOnMachineFunction(MF); 906 907 // Visit blocks in dominator tree pre-order. The pre-order enables multiple 908 // cmp-conversions from the same head block. 909 // Note that updateDomTree() modifies the children of the DomTree node 910 // currently being visited. The df_iterator supports that; it doesn't look at 911 // child_begin() / child_end() until after a node has been visited. 912 for (auto *I : depth_first(DomTree)) 913 if (tryConvert(I->getBlock())) 914 Changed = true; 915 916 return Changed; 917 } 918