1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements inline cost analysis. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Analysis/InlineCost.h" 15 #include "llvm/Support/CallSite.h" 16 #include "llvm/CallingConv.h" 17 #include "llvm/IntrinsicInst.h" 18 #include "llvm/Target/TargetData.h" 19 #include "llvm/ADT/SmallPtrSet.h" 20 21 using namespace llvm; 22 23 /// callIsSmall - If a call is likely to lower to a single target instruction, 24 /// or is otherwise deemed small return true. 25 /// TODO: Perhaps calls like memcpy, strcpy, etc? 26 bool llvm::callIsSmall(const Function *F) { 27 if (!F) return false; 28 29 if (F->hasLocalLinkage()) return false; 30 31 if (!F->hasName()) return false; 32 33 StringRef Name = F->getName(); 34 35 // These will all likely lower to a single selection DAG node. 36 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || 37 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || 38 Name == "sin" || Name == "sinf" || Name == "sinl" || 39 Name == "cos" || Name == "cosf" || Name == "cosl" || 40 Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ) 41 return true; 42 43 // These are all likely to be optimized into something smaller. 44 if (Name == "pow" || Name == "powf" || Name == "powl" || 45 Name == "exp2" || Name == "exp2l" || Name == "exp2f" || 46 Name == "floor" || Name == "floorf" || Name == "ceil" || 47 Name == "round" || Name == "ffs" || Name == "ffsl" || 48 Name == "abs" || Name == "labs" || Name == "llabs") 49 return true; 50 51 return false; 52 } 53 54 /// analyzeBasicBlock - Fill in the current structure with information gleaned 55 /// from the specified block. 56 void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, 57 const TargetData *TD) { 58 ++NumBlocks; 59 unsigned NumInstsBeforeThisBB = NumInsts; 60 for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); 61 II != E; ++II) { 62 if (isa<PHINode>(II)) continue; // PHI nodes don't count. 63 64 // Special handling for calls. 65 if (isa<CallInst>(II) || isa<InvokeInst>(II)) { 66 if (isa<DbgInfoIntrinsic>(II)) 67 continue; // Debug intrinsics don't count as size. 68 69 ImmutableCallSite CS(cast<Instruction>(II)); 70 71 if (const Function *F = CS.getCalledFunction()) { 72 // If a function is both internal and has a single use, then it is 73 // extremely likely to get inlined in the future (it was probably 74 // exposed by an interleaved devirtualization pass). 75 if (F->hasInternalLinkage() && F->hasOneUse()) 76 ++NumInlineCandidates; 77 78 // If this call is to function itself, then the function is recursive. 79 // Inlining it into other functions is a bad idea, because this is 80 // basically just a form of loop peeling, and our metrics aren't useful 81 // for that case. 82 if (F == BB->getParent()) 83 isRecursive = true; 84 } 85 86 if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) { 87 // Each argument to a call takes on average one instruction to set up. 88 NumInsts += CS.arg_size(); 89 90 // We don't want inline asm to count as a call - that would prevent loop 91 // unrolling. The argument setup cost is still real, though. 92 if (!isa<InlineAsm>(CS.getCalledValue())) 93 ++NumCalls; 94 } 95 } 96 97 if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { 98 if (!AI->isStaticAlloca()) 99 this->usesDynamicAlloca = true; 100 } 101 102 if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy()) 103 ++NumVectorInsts; 104 105 if (const CastInst *CI = dyn_cast<CastInst>(II)) { 106 // Noop casts, including ptr <-> int, don't count. 107 if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || 108 isa<PtrToIntInst>(CI)) 109 continue; 110 // trunc to a native type is free (assuming the target has compare and 111 // shift-right of the same width). 112 if (isa<TruncInst>(CI) && TD && 113 TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) 114 continue; 115 // Result of a cmp instruction is often extended (to be used by other 116 // cmp instructions, logical or return instructions). These are usually 117 // nop on most sane targets. 118 if (isa<CmpInst>(CI->getOperand(0))) 119 continue; 120 } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){ 121 // If a GEP has all constant indices, it will probably be folded with 122 // a load/store. 123 if (GEPI->hasAllConstantIndices()) 124 continue; 125 } 126 127 ++NumInsts; 128 } 129 130 if (isa<ReturnInst>(BB->getTerminator())) 131 ++NumRets; 132 133 // We never want to inline functions that contain an indirectbr. This is 134 // incorrect because all the blockaddress's (in static global initializers 135 // for example) would be referring to the original function, and this indirect 136 // jump would jump from the inlined copy of the function into the original 137 // function which is extremely undefined behavior. 138 if (isa<IndirectBrInst>(BB->getTerminator())) 139 containsIndirectBr = true; 140 141 // Remember NumInsts for this BB. 142 NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; 143 } 144 145 // CountCodeReductionForConstant - Figure out an approximation for how many 146 // instructions will be constant folded if the specified value is constant. 147 // 148 unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) { 149 unsigned Reduction = 0; 150 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ 151 User *U = *UI; 152 if (isa<BranchInst>(U) || isa<SwitchInst>(U)) { 153 // We will be able to eliminate all but one of the successors. 154 const TerminatorInst &TI = cast<TerminatorInst>(*U); 155 const unsigned NumSucc = TI.getNumSuccessors(); 156 unsigned Instrs = 0; 157 for (unsigned I = 0; I != NumSucc; ++I) 158 Instrs += NumBBInsts[TI.getSuccessor(I)]; 159 // We don't know which blocks will be eliminated, so use the average size. 160 Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc; 161 } else { 162 // Figure out if this instruction will be removed due to simple constant 163 // propagation. 164 Instruction &Inst = cast<Instruction>(*U); 165 166 // We can't constant propagate instructions which have effects or 167 // read memory. 168 // 169 // FIXME: It would be nice to capture the fact that a load from a 170 // pointer-to-constant-global is actually a *really* good thing to zap. 171 // Unfortunately, we don't know the pointer that may get propagated here, 172 // so we can't make this decision. 173 if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || 174 isa<AllocaInst>(Inst)) 175 continue; 176 177 bool AllOperandsConstant = true; 178 for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) 179 if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) { 180 AllOperandsConstant = false; 181 break; 182 } 183 184 if (AllOperandsConstant) { 185 // We will get to remove this instruction... 186 Reduction += InlineConstants::InstrCost; 187 188 // And any other instructions that use it which become constants 189 // themselves. 190 Reduction += CountCodeReductionForConstant(&Inst); 191 } 192 } 193 } 194 return Reduction; 195 } 196 197 // CountCodeReductionForAlloca - Figure out an approximation of how much smaller 198 // the function will be if it is inlined into a context where an argument 199 // becomes an alloca. 200 // 201 unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) { 202 if (!V->getType()->isPointerTy()) return 0; // Not a pointer 203 unsigned Reduction = 0; 204 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ 205 Instruction *I = cast<Instruction>(*UI); 206 if (isa<LoadInst>(I) || isa<StoreInst>(I)) 207 Reduction += InlineConstants::InstrCost; 208 else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { 209 // If the GEP has variable indices, we won't be able to do much with it. 210 if (GEP->hasAllConstantIndices()) 211 Reduction += CountCodeReductionForAlloca(GEP); 212 } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) { 213 // Track pointer through bitcasts. 214 Reduction += CountCodeReductionForAlloca(BCI); 215 } else { 216 // If there is some other strange instruction, we're not going to be able 217 // to do much if we inline this. 218 return 0; 219 } 220 } 221 222 return Reduction; 223 } 224 225 /// analyzeFunction - Fill in the current structure with information gleaned 226 /// from the specified function. 227 void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) { 228 // If this function contains a call to setjmp or _setjmp, never inline 229 // it. This is a hack because we depend on the user marking their local 230 // variables as volatile if they are live across a setjmp call, and they 231 // probably won't do this in callers. 232 if (F->callsFunctionThatReturnsTwice()) 233 callsSetJmp = true; 234 235 // Look at the size of the callee. 236 for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) 237 analyzeBasicBlock(&*BB, TD); 238 } 239 240 /// analyzeFunction - Fill in the current structure with information gleaned 241 /// from the specified function. 242 void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F, 243 const TargetData *TD) { 244 Metrics.analyzeFunction(F, TD); 245 246 // A function with exactly one return has it removed during the inlining 247 // process (see InlineFunction), so don't count it. 248 // FIXME: This knowledge should really be encoded outside of FunctionInfo. 249 if (Metrics.NumRets==1) 250 --Metrics.NumInsts; 251 252 // Check out all of the arguments to the function, figuring out how much 253 // code can be eliminated if one of the arguments is a constant. 254 ArgumentWeights.reserve(F->arg_size()); 255 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) 256 ArgumentWeights.push_back(ArgInfo(Metrics.CountCodeReductionForConstant(I), 257 Metrics.CountCodeReductionForAlloca(I))); 258 } 259 260 /// NeverInline - returns true if the function should never be inlined into 261 /// any caller 262 bool InlineCostAnalyzer::FunctionInfo::NeverInline() { 263 return (Metrics.callsSetJmp || Metrics.isRecursive || 264 Metrics.containsIndirectBr); 265 } 266 // getSpecializationBonus - The heuristic used to determine the per-call 267 // performance boost for using a specialization of Callee with argument 268 // specializedArgNo replaced by a constant. 269 int InlineCostAnalyzer::getSpecializationBonus(Function *Callee, 270 SmallVectorImpl<unsigned> &SpecializedArgNos) 271 { 272 if (Callee->mayBeOverridden()) 273 return 0; 274 275 int Bonus = 0; 276 // If this function uses the coldcc calling convention, prefer not to 277 // specialize it. 278 if (Callee->getCallingConv() == CallingConv::Cold) 279 Bonus -= InlineConstants::ColdccPenalty; 280 281 // Get information about the callee. 282 FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; 283 284 // If we haven't calculated this information yet, do so now. 285 if (CalleeFI->Metrics.NumBlocks == 0) 286 CalleeFI->analyzeFunction(Callee, TD); 287 288 unsigned ArgNo = 0; 289 unsigned i = 0; 290 for (Function::arg_iterator I = Callee->arg_begin(), E = Callee->arg_end(); 291 I != E; ++I, ++ArgNo) 292 if (ArgNo == SpecializedArgNos[i]) { 293 ++i; 294 Bonus += CountBonusForConstant(I); 295 } 296 297 // Calls usually take a long time, so they make the specialization gain 298 // smaller. 299 Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; 300 301 return Bonus; 302 } 303 304 // ConstantFunctionBonus - Figure out how much of a bonus we can get for 305 // possibly devirtualizing a function. We'll subtract the size of the function 306 // we may wish to inline from the indirect call bonus providing a limit on 307 // growth. Leave an upper limit of 0 for the bonus - we don't want to penalize 308 // inlining because we decide we don't want to give a bonus for 309 // devirtualizing. 310 int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) { 311 312 // This could just be NULL. 313 if (!C) return 0; 314 315 Function *F = dyn_cast<Function>(C); 316 if (!F) return 0; 317 318 int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F); 319 return (Bonus > 0) ? 0 : Bonus; 320 } 321 322 // CountBonusForConstant - Figure out an approximation for how much per-call 323 // performance boost we can expect if the specified value is constant. 324 int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) { 325 unsigned Bonus = 0; 326 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ 327 User *U = *UI; 328 if (CallInst *CI = dyn_cast<CallInst>(U)) { 329 // Turning an indirect call into a direct call is a BIG win 330 if (CI->getCalledValue() == V) 331 Bonus += ConstantFunctionBonus(CallSite(CI), C); 332 } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) { 333 // Turning an indirect call into a direct call is a BIG win 334 if (II->getCalledValue() == V) 335 Bonus += ConstantFunctionBonus(CallSite(II), C); 336 } 337 // FIXME: Eliminating conditional branches and switches should 338 // also yield a per-call performance boost. 339 else { 340 // Figure out the bonuses that wll accrue due to simple constant 341 // propagation. 342 Instruction &Inst = cast<Instruction>(*U); 343 344 // We can't constant propagate instructions which have effects or 345 // read memory. 346 // 347 // FIXME: It would be nice to capture the fact that a load from a 348 // pointer-to-constant-global is actually a *really* good thing to zap. 349 // Unfortunately, we don't know the pointer that may get propagated here, 350 // so we can't make this decision. 351 if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || 352 isa<AllocaInst>(Inst)) 353 continue; 354 355 bool AllOperandsConstant = true; 356 for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) 357 if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) { 358 AllOperandsConstant = false; 359 break; 360 } 361 362 if (AllOperandsConstant) 363 Bonus += CountBonusForConstant(&Inst); 364 } 365 } 366 367 return Bonus; 368 } 369 370 int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) { 371 // Get information about the callee. 372 FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; 373 374 // If we haven't calculated this information yet, do so now. 375 if (CalleeFI->Metrics.NumBlocks == 0) 376 CalleeFI->analyzeFunction(Callee, TD); 377 378 // InlineCost - This value measures how good of an inline candidate this call 379 // site is to inline. A lower inline cost make is more likely for the call to 380 // be inlined. This value may go negative. 381 // 382 int InlineCost = 0; 383 384 // Compute any size reductions we can expect due to arguments being passed into 385 // the function. 386 // 387 unsigned ArgNo = 0; 388 CallSite::arg_iterator I = CS.arg_begin(); 389 for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end(); 390 FI != FE; ++I, ++FI, ++ArgNo) { 391 392 // If an alloca is passed in, inlining this function is likely to allow 393 // significant future optimization possibilities (like scalar promotion, and 394 // scalarization), so encourage the inlining of the function. 395 // 396 if (isa<AllocaInst>(I)) 397 InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight; 398 399 // If this is a constant being passed into the function, use the argument 400 // weights calculated for the callee to determine how much will be folded 401 // away with this information. 402 else if (isa<Constant>(I)) 403 InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight; 404 } 405 406 // Each argument passed in has a cost at both the caller and the callee 407 // sides. Measurements show that each argument costs about the same as an 408 // instruction. 409 InlineCost -= (CS.arg_size() * InlineConstants::InstrCost); 410 411 // Now that we have considered all of the factors that make the call site more 412 // likely to be inlined, look at factors that make us not want to inline it. 413 414 // Calls usually take a long time, so they make the inlining gain smaller. 415 InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; 416 417 // Look at the size of the callee. Each instruction counts as 5. 418 InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost; 419 420 return InlineCost; 421 } 422 423 int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) { 424 // Get information about the callee. 425 FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; 426 427 // If we haven't calculated this information yet, do so now. 428 if (CalleeFI->Metrics.NumBlocks == 0) 429 CalleeFI->analyzeFunction(Callee, TD); 430 431 bool isDirectCall = CS.getCalledFunction() == Callee; 432 Instruction *TheCall = CS.getInstruction(); 433 int Bonus = 0; 434 435 // If there is only one call of the function, and it has internal linkage, 436 // make it almost guaranteed to be inlined. 437 // 438 if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall) 439 Bonus += InlineConstants::LastCallToStaticBonus; 440 441 // If the instruction after the call, or if the normal destination of the 442 // invoke is an unreachable instruction, the function is noreturn. As such, 443 // there is little point in inlining this. 444 if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { 445 if (isa<UnreachableInst>(II->getNormalDest()->begin())) 446 Bonus += InlineConstants::NoreturnPenalty; 447 } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall))) 448 Bonus += InlineConstants::NoreturnPenalty; 449 450 // If this function uses the coldcc calling convention, prefer not to inline 451 // it. 452 if (Callee->getCallingConv() == CallingConv::Cold) 453 Bonus += InlineConstants::ColdccPenalty; 454 455 // Add to the inline quality for properties that make the call valuable to 456 // inline. This includes factors that indicate that the result of inlining 457 // the function will be optimizable. Currently this just looks at arguments 458 // passed into the function. 459 // 460 CallSite::arg_iterator I = CS.arg_begin(); 461 for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end(); 462 FI != FE; ++I, ++FI) 463 // Compute any constant bonus due to inlining we want to give here. 464 if (isa<Constant>(I)) 465 Bonus += CountBonusForConstant(FI, cast<Constant>(I)); 466 467 return Bonus; 468 } 469 470 // getInlineCost - The heuristic used to determine if we should inline the 471 // function call or not. 472 // 473 InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, 474 SmallPtrSet<const Function*, 16> &NeverInline) { 475 return getInlineCost(CS, CS.getCalledFunction(), NeverInline); 476 } 477 478 InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, 479 Function *Callee, 480 SmallPtrSet<const Function*, 16> &NeverInline) { 481 Instruction *TheCall = CS.getInstruction(); 482 Function *Caller = TheCall->getParent()->getParent(); 483 484 // Don't inline functions which can be redefined at link-time to mean 485 // something else. Don't inline functions marked noinline or call sites 486 // marked noinline. 487 if (Callee->mayBeOverridden() || 488 Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) || 489 CS.isNoInline()) 490 return llvm::InlineCost::getNever(); 491 492 // Get information about the callee. 493 FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; 494 495 // If we haven't calculated this information yet, do so now. 496 if (CalleeFI->Metrics.NumBlocks == 0) 497 CalleeFI->analyzeFunction(Callee, TD); 498 499 // If we should never inline this, return a huge cost. 500 if (CalleeFI->NeverInline()) 501 return InlineCost::getNever(); 502 503 // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we 504 // could move this up and avoid computing the FunctionInfo for 505 // things we are going to just return always inline for. This 506 // requires handling setjmp somewhere else, however. 507 if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline)) 508 return InlineCost::getAlways(); 509 510 if (CalleeFI->Metrics.usesDynamicAlloca) { 511 // Get information about the caller. 512 FunctionInfo &CallerFI = CachedFunctionInfo[Caller]; 513 514 // If we haven't calculated this information yet, do so now. 515 if (CallerFI.Metrics.NumBlocks == 0) { 516 CallerFI.analyzeFunction(Caller, TD); 517 518 // Recompute the CalleeFI pointer, getting Caller could have invalidated 519 // it. 520 CalleeFI = &CachedFunctionInfo[Callee]; 521 } 522 523 // Don't inline a callee with dynamic alloca into a caller without them. 524 // Functions containing dynamic alloca's are inefficient in various ways; 525 // don't create more inefficiency. 526 if (!CallerFI.Metrics.usesDynamicAlloca) 527 return InlineCost::getNever(); 528 } 529 530 // InlineCost - This value measures how good of an inline candidate this call 531 // site is to inline. A lower inline cost make is more likely for the call to 532 // be inlined. This value may go negative due to the fact that bonuses 533 // are negative numbers. 534 // 535 int InlineCost = getInlineSize(CS, Callee) + getInlineBonuses(CS, Callee); 536 return llvm::InlineCost::get(InlineCost); 537 } 538 539 // getSpecializationCost - The heuristic used to determine the code-size 540 // impact of creating a specialized version of Callee with argument 541 // SpecializedArgNo replaced by a constant. 542 InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee, 543 SmallVectorImpl<unsigned> &SpecializedArgNos) 544 { 545 // Don't specialize functions which can be redefined at link-time to mean 546 // something else. 547 if (Callee->mayBeOverridden()) 548 return llvm::InlineCost::getNever(); 549 550 // Get information about the callee. 551 FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; 552 553 // If we haven't calculated this information yet, do so now. 554 if (CalleeFI->Metrics.NumBlocks == 0) 555 CalleeFI->analyzeFunction(Callee, TD); 556 557 int Cost = 0; 558 559 // Look at the original size of the callee. Each instruction counts as 5. 560 Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost; 561 562 // Offset that with the amount of code that can be constant-folded 563 // away with the given arguments replaced by constants. 564 for (SmallVectorImpl<unsigned>::iterator an = SpecializedArgNos.begin(), 565 ae = SpecializedArgNos.end(); an != ae; ++an) 566 Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight; 567 568 return llvm::InlineCost::get(Cost); 569 } 570 571 // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a 572 // higher threshold to determine if the function call should be inlined. 573 float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) { 574 Function *Callee = CS.getCalledFunction(); 575 576 // Get information about the callee. 577 FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; 578 579 // If we haven't calculated this information yet, do so now. 580 if (CalleeFI.Metrics.NumBlocks == 0) 581 CalleeFI.analyzeFunction(Callee, TD); 582 583 float Factor = 1.0f; 584 // Single BB functions are often written to be inlined. 585 if (CalleeFI.Metrics.NumBlocks == 1) 586 Factor += 0.5f; 587 588 // Be more aggressive if the function contains a good chunk (if it mades up 589 // at least 10% of the instructions) of vector instructions. 590 if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/2) 591 Factor += 2.0f; 592 else if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/10) 593 Factor += 1.5f; 594 return Factor; 595 } 596 597 /// growCachedCostInfo - update the cached cost info for Caller after Callee has 598 /// been inlined. 599 void 600 InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) { 601 CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics; 602 603 // For small functions we prefer to recalculate the cost for better accuracy. 604 if (CallerMetrics.NumBlocks < 10 && CallerMetrics.NumInsts < 1000) { 605 resetCachedCostInfo(Caller); 606 return; 607 } 608 609 // For large functions, we can save a lot of computation time by skipping 610 // recalculations. 611 if (CallerMetrics.NumCalls > 0) 612 --CallerMetrics.NumCalls; 613 614 if (Callee == 0) return; 615 616 CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics; 617 618 // If we don't have metrics for the callee, don't recalculate them just to 619 // update an approximation in the caller. Instead, just recalculate the 620 // caller info from scratch. 621 if (CalleeMetrics.NumBlocks == 0) { 622 resetCachedCostInfo(Caller); 623 return; 624 } 625 626 // Since CalleeMetrics were already calculated, we know that the CallerMetrics 627 // reference isn't invalidated: both were in the DenseMap. 628 CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca; 629 630 // FIXME: If any of these three are true for the callee, the callee was 631 // not inlined into the caller, so I think they're redundant here. 632 CallerMetrics.callsSetJmp |= CalleeMetrics.callsSetJmp; 633 CallerMetrics.isRecursive |= CalleeMetrics.isRecursive; 634 CallerMetrics.containsIndirectBr |= CalleeMetrics.containsIndirectBr; 635 636 CallerMetrics.NumInsts += CalleeMetrics.NumInsts; 637 CallerMetrics.NumBlocks += CalleeMetrics.NumBlocks; 638 CallerMetrics.NumCalls += CalleeMetrics.NumCalls; 639 CallerMetrics.NumVectorInsts += CalleeMetrics.NumVectorInsts; 640 CallerMetrics.NumRets += CalleeMetrics.NumRets; 641 642 // analyzeBasicBlock counts each function argument as an inst. 643 if (CallerMetrics.NumInsts >= Callee->arg_size()) 644 CallerMetrics.NumInsts -= Callee->arg_size(); 645 else 646 CallerMetrics.NumInsts = 0; 647 648 // We are not updating the argument weights. We have already determined that 649 // Caller is a fairly large function, so we accept the loss of precision. 650 } 651 652 /// clear - empty the cache of inline costs 653 void InlineCostAnalyzer::clear() { 654 CachedFunctionInfo.clear(); 655 } 656