1 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains an optimization for div and rem on architectures that 11 // execute short instructions significantly faster than longer instructions. 12 // For example, on Intel Atom 32-bit divides are slow enough that during 13 // runtime it is profitable to check the value of the operands, and if they are 14 // positive and less than 256 use an unsigned 8-bit divide. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/Transforms/Utils/BypassSlowDivision.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/IR/Function.h" 21 #include "llvm/IR/IRBuilder.h" 22 #include "llvm/IR/Instructions.h" 23 24 using namespace llvm; 25 26 #define DEBUG_TYPE "bypass-slow-division" 27 28 namespace { 29 struct DivOpInfo { 30 bool SignedOp; 31 Value *Dividend; 32 Value *Divisor; 33 34 DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor) 35 : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} 36 }; 37 38 struct DivPhiNodes { 39 PHINode *Quotient; 40 PHINode *Remainder; 41 42 DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) 43 : Quotient(InQuotient), Remainder(InRemainder) {} 44 }; 45 } 46 47 namespace llvm { 48 template<> 49 struct DenseMapInfo<DivOpInfo> { 50 static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { 51 return Val1.SignedOp == Val2.SignedOp && 52 Val1.Dividend == Val2.Dividend && 53 Val1.Divisor == Val2.Divisor; 54 } 55 56 static DivOpInfo getEmptyKey() { 57 return DivOpInfo(false, nullptr, nullptr); 58 } 59 60 static DivOpInfo getTombstoneKey() { 61 return DivOpInfo(true, nullptr, nullptr); 62 } 63 64 static unsigned getHashValue(const DivOpInfo &Val) { 65 return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^ 66 reinterpret_cast<uintptr_t>(Val.Divisor)) ^ 67 (unsigned)Val.SignedOp; 68 } 69 }; 70 71 typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy; 72 } 73 74 // insertFastDiv - Substitutes the div/rem instruction with code that checks the 75 // value of the operands and uses a shorter-faster div/rem instruction when 76 // possible and the longer-slower div/rem instruction otherwise. 77 static bool insertFastDiv(Function &F, 78 Function::iterator &I, 79 BasicBlock::iterator &J, 80 IntegerType *BypassType, 81 bool UseDivOp, 82 bool UseSignedOp, 83 DivCacheTy &PerBBDivCache) { 84 // Get instruction operands 85 Instruction *Instr = J; 86 Value *Dividend = Instr->getOperand(0); 87 Value *Divisor = Instr->getOperand(1); 88 89 if (isa<ConstantInt>(Divisor) || 90 (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) { 91 // Operations with immediate values should have 92 // been solved and replaced during compile time. 93 return false; 94 } 95 96 // Basic Block is split before divide 97 BasicBlock *MainBB = I; 98 BasicBlock *SuccessorBB = I->splitBasicBlock(J); 99 ++I; //advance iterator I to successorBB 100 101 // Add new basic block for slow divide operation 102 BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "", 103 MainBB->getParent(), SuccessorBB); 104 SlowBB->moveBefore(SuccessorBB); 105 IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); 106 Value *SlowQuotientV; 107 Value *SlowRemainderV; 108 if (UseSignedOp) { 109 SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); 110 SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); 111 } else { 112 SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); 113 SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); 114 } 115 SlowBuilder.CreateBr(SuccessorBB); 116 117 // Add new basic block for fast divide operation 118 BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "", 119 MainBB->getParent(), SuccessorBB); 120 FastBB->moveBefore(SlowBB); 121 IRBuilder<> FastBuilder(FastBB, FastBB->begin()); 122 Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, 123 BypassType); 124 Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, 125 BypassType); 126 127 // udiv/urem because optimization only handles positive numbers 128 Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV, 129 ShortDivisorV); 130 Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, 131 ShortDivisorV); 132 Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, 133 ShortQuotientV, 134 Dividend->getType()); 135 Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, 136 ShortRemainderV, 137 Dividend->getType()); 138 FastBuilder.CreateBr(SuccessorBB); 139 140 // Phi nodes for result of div and rem 141 IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); 142 PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); 143 QuoPhi->addIncoming(SlowQuotientV, SlowBB); 144 QuoPhi->addIncoming(FastQuotientV, FastBB); 145 PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); 146 RemPhi->addIncoming(SlowRemainderV, SlowBB); 147 RemPhi->addIncoming(FastRemainderV, FastBB); 148 149 // Replace Instr with appropriate phi node 150 if (UseDivOp) 151 Instr->replaceAllUsesWith(QuoPhi); 152 else 153 Instr->replaceAllUsesWith(RemPhi); 154 Instr->eraseFromParent(); 155 156 // Combine operands into a single value with OR for value testing below 157 MainBB->getInstList().back().eraseFromParent(); 158 IRBuilder<> MainBuilder(MainBB, MainBB->end()); 159 Value *OrV = MainBuilder.CreateOr(Dividend, Divisor); 160 161 // BitMask is inverted to check if the operands are 162 // larger than the bypass type 163 uint64_t BitMask = ~BypassType->getBitMask(); 164 Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); 165 166 // Compare operand values and branch 167 Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0); 168 Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); 169 MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); 170 171 // point iterator J at first instruction of successorBB 172 J = I->begin(); 173 174 // Cache phi nodes to be used later in place of other instances 175 // of div or rem with the same sign, dividend, and divisor 176 DivOpInfo Key(UseSignedOp, Dividend, Divisor); 177 DivPhiNodes Value(QuoPhi, RemPhi); 178 PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value)); 179 return true; 180 } 181 182 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if 183 // operands and operation are identical. Otherwise call insertFastDiv to perform 184 // the optimization and cache the resulting dividend and remainder. 185 static bool reuseOrInsertFastDiv(Function &F, 186 Function::iterator &I, 187 BasicBlock::iterator &J, 188 IntegerType *BypassType, 189 bool UseDivOp, 190 bool UseSignedOp, 191 DivCacheTy &PerBBDivCache) { 192 // Get instruction operands 193 Instruction *Instr = J; 194 DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1)); 195 DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); 196 197 if (CacheI == PerBBDivCache.end()) { 198 // If previous instance does not exist, insert fast div 199 return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, 200 PerBBDivCache); 201 } 202 203 // Replace operation value with previously generated phi node 204 DivPhiNodes &Value = CacheI->second; 205 if (UseDivOp) { 206 // Replace all uses of div instruction with quotient phi node 207 J->replaceAllUsesWith(Value.Quotient); 208 } else { 209 // Replace all uses of rem instruction with remainder phi node 210 J->replaceAllUsesWith(Value.Remainder); 211 } 212 213 // Advance to next operation 214 ++J; 215 216 // Remove redundant operation 217 Instr->eraseFromParent(); 218 return true; 219 } 220 221 // bypassSlowDivision - This optimization identifies DIV instructions that can 222 // be profitably bypassed and carried out with a shorter, faster divide. 223 bool llvm::bypassSlowDivision(Function &F, 224 Function::iterator &I, 225 const DenseMap<unsigned int, unsigned int> &BypassWidths) { 226 DivCacheTy DivCache; 227 228 bool MadeChange = false; 229 for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) { 230 231 // Get instruction details 232 unsigned Opcode = J->getOpcode(); 233 bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; 234 bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; 235 bool UseSignedOp = Opcode == Instruction::SDiv || 236 Opcode == Instruction::SRem; 237 238 // Only optimize div or rem ops 239 if (!UseDivOp && !UseRemOp) 240 continue; 241 242 // Skip division on vector types, only optimize integer instructions 243 if (!J->getType()->isIntegerTy()) 244 continue; 245 246 // Get bitwidth of div/rem instruction 247 IntegerType *T = cast<IntegerType>(J->getType()); 248 unsigned int bitwidth = T->getBitWidth(); 249 250 // Continue if bitwidth is not bypassed 251 DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth); 252 if (BI == BypassWidths.end()) 253 continue; 254 255 // Get type for div/rem instruction with bypass bitwidth 256 IntegerType *BT = IntegerType::get(J->getContext(), BI->second); 257 258 MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp, 259 UseSignedOp, DivCache); 260 } 261 262 return MadeChange; 263 } 264