1 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains an optimization for div and rem on architectures that 11 // execute short instructions significantly faster than longer instructions. 12 // For example, on Intel Atom 32-bit divides are slow enough that during 13 // runtime it is profitable to check the value of the operands, and if they are 14 // positive and less than 256 use an unsigned 8-bit divide. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/Transforms/Utils/BypassSlowDivision.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/IR/Function.h" 21 #include "llvm/IR/IRBuilder.h" 22 #include "llvm/IR/Instructions.h" 23 24 using namespace llvm; 25 26 #define DEBUG_TYPE "bypass-slow-division" 27 28 namespace { 29 struct DivOpInfo { 30 bool SignedOp; 31 Value *Dividend; 32 Value *Divisor; 33 34 DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor) 35 : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} 36 }; 37 38 struct DivPhiNodes { 39 PHINode *Quotient; 40 PHINode *Remainder; 41 42 DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) 43 : Quotient(InQuotient), Remainder(InRemainder) {} 44 }; 45 } 46 47 namespace llvm { 48 template<> 49 struct DenseMapInfo<DivOpInfo> { 50 static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { 51 return Val1.SignedOp == Val2.SignedOp && 52 Val1.Dividend == Val2.Dividend && 53 Val1.Divisor == Val2.Divisor; 54 } 55 56 static DivOpInfo getEmptyKey() { 57 return DivOpInfo(false, nullptr, nullptr); 58 } 59 60 static DivOpInfo getTombstoneKey() { 61 return DivOpInfo(true, nullptr, nullptr); 62 } 63 64 static unsigned getHashValue(const DivOpInfo &Val) { 65 return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^ 66 reinterpret_cast<uintptr_t>(Val.Divisor)) ^ 67 (unsigned)Val.SignedOp; 68 } 69 }; 70 71 typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy; 72 } 73 74 // insertFastDiv - Substitutes the div/rem instruction with code that checks the 75 // value of the operands and uses a shorter-faster div/rem instruction when 76 // possible and the longer-slower div/rem instruction otherwise. 77 static bool insertFastDiv(Instruction *I, IntegerType *BypassType, 78 bool UseDivOp, bool UseSignedOp, 79 DivCacheTy &PerBBDivCache) { 80 Function *F = I->getParent()->getParent(); 81 // Get instruction operands 82 Value *Dividend = I->getOperand(0); 83 Value *Divisor = I->getOperand(1); 84 85 if (isa<ConstantInt>(Divisor) || 86 (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) { 87 // Operations with immediate values should have 88 // been solved and replaced during compile time. 89 return false; 90 } 91 92 // Basic Block is split before divide 93 BasicBlock *MainBB = &*I->getParent(); 94 BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I); 95 96 // Add new basic block for slow divide operation 97 BasicBlock *SlowBB = 98 BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); 99 SlowBB->moveBefore(SuccessorBB); 100 IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); 101 Value *SlowQuotientV; 102 Value *SlowRemainderV; 103 if (UseSignedOp) { 104 SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); 105 SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); 106 } else { 107 SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); 108 SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); 109 } 110 SlowBuilder.CreateBr(SuccessorBB); 111 112 // Add new basic block for fast divide operation 113 BasicBlock *FastBB = 114 BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); 115 FastBB->moveBefore(SlowBB); 116 IRBuilder<> FastBuilder(FastBB, FastBB->begin()); 117 Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, 118 BypassType); 119 Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, 120 BypassType); 121 122 // udiv/urem because optimization only handles positive numbers 123 Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV, 124 ShortDivisorV); 125 Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, 126 ShortDivisorV); 127 Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, 128 ShortQuotientV, 129 Dividend->getType()); 130 Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, 131 ShortRemainderV, 132 Dividend->getType()); 133 FastBuilder.CreateBr(SuccessorBB); 134 135 // Phi nodes for result of div and rem 136 IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); 137 PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); 138 QuoPhi->addIncoming(SlowQuotientV, SlowBB); 139 QuoPhi->addIncoming(FastQuotientV, FastBB); 140 PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); 141 RemPhi->addIncoming(SlowRemainderV, SlowBB); 142 RemPhi->addIncoming(FastRemainderV, FastBB); 143 144 // Replace I with appropriate phi node 145 if (UseDivOp) 146 I->replaceAllUsesWith(QuoPhi); 147 else 148 I->replaceAllUsesWith(RemPhi); 149 I->eraseFromParent(); 150 151 // Combine operands into a single value with OR for value testing below 152 MainBB->getInstList().back().eraseFromParent(); 153 IRBuilder<> MainBuilder(MainBB, MainBB->end()); 154 Value *OrV = MainBuilder.CreateOr(Dividend, Divisor); 155 156 // BitMask is inverted to check if the operands are 157 // larger than the bypass type 158 uint64_t BitMask = ~BypassType->getBitMask(); 159 Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); 160 161 // Compare operand values and branch 162 Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0); 163 Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); 164 MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); 165 166 // Cache phi nodes to be used later in place of other instances 167 // of div or rem with the same sign, dividend, and divisor 168 DivOpInfo Key(UseSignedOp, Dividend, Divisor); 169 DivPhiNodes Value(QuoPhi, RemPhi); 170 PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value)); 171 return true; 172 } 173 174 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from 175 // the current BB if operands and operation are identical. Otherwise calls 176 // insertFastDiv to perform the optimization and caches the resulting dividend 177 // and remainder. 178 static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType, 179 bool UseDivOp, bool UseSignedOp, 180 DivCacheTy &PerBBDivCache) { 181 // Get instruction operands 182 DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1)); 183 DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); 184 185 if (CacheI == PerBBDivCache.end()) { 186 // If previous instance does not exist, insert fast div 187 return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache); 188 } 189 190 // Replace operation value with previously generated phi node 191 DivPhiNodes &Value = CacheI->second; 192 if (UseDivOp) { 193 // Replace all uses of div instruction with quotient phi node 194 I->replaceAllUsesWith(Value.Quotient); 195 } else { 196 // Replace all uses of rem instruction with remainder phi node 197 I->replaceAllUsesWith(Value.Remainder); 198 } 199 200 // Remove redundant operation 201 I->eraseFromParent(); 202 return true; 203 } 204 205 // bypassSlowDivision - This optimization identifies DIV instructions in a BB 206 // that can be profitably bypassed and carried out with a shorter, faster 207 // divide. 208 bool llvm::bypassSlowDivision( 209 BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) { 210 DivCacheTy DivCache; 211 212 bool MadeChange = false; 213 Instruction* Next = &*BB->begin(); 214 while (Next != nullptr) { 215 // We may add instructions immediately after I, but we want to skip over 216 // them. 217 Instruction* I = Next; 218 Next = Next->getNextNode(); 219 220 // Get instruction details 221 unsigned Opcode = I->getOpcode(); 222 bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; 223 bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; 224 bool UseSignedOp = Opcode == Instruction::SDiv || 225 Opcode == Instruction::SRem; 226 227 // Only optimize div or rem ops 228 if (!UseDivOp && !UseRemOp) 229 continue; 230 231 // Skip division on vector types, only optimize integer instructions 232 if (!I->getType()->isIntegerTy()) 233 continue; 234 235 // Get bitwidth of div/rem instruction 236 IntegerType *T = cast<IntegerType>(I->getType()); 237 unsigned int bitwidth = T->getBitWidth(); 238 239 // Continue if bitwidth is not bypassed 240 DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth); 241 if (BI == BypassWidths.end()) 242 continue; 243 244 // Get type for div/rem instruction with bypass bitwidth 245 IntegerType *BT = IntegerType::get(I->getContext(), BI->second); 246 247 MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache); 248 } 249 250 return MadeChange; 251 } 252