1 //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This file provides helpers for the implementation of 11 /// a TargetTransformInfo-conforming class. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 16 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 17 18 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 19 #include "llvm/Analysis/TargetTransformInfo.h" 20 #include "llvm/Analysis/VectorUtils.h" 21 #include "llvm/IR/CallSite.h" 22 #include "llvm/IR/DataLayout.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/GetElementPtrTypeIterator.h" 25 #include "llvm/IR/Operator.h" 26 #include "llvm/IR/Type.h" 27 28 namespace llvm { 29 30 /// \brief Base class for use as a mix-in that aids implementing 31 /// a TargetTransformInfo-compatible class. 32 class TargetTransformInfoImplBase { 33 protected: 34 typedef TargetTransformInfo TTI; 35 36 const DataLayout &DL; 37 38 explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {} 39 40 public: 41 // Provide value semantics. MSVC requires that we spell all of these out. 42 TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) 43 : DL(Arg.DL) {} 44 TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {} 45 46 const DataLayout &getDataLayout() const { return DL; } 47 48 unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { 49 switch (Opcode) { 50 default: 51 // By default, just classify everything as 'basic'. 52 return TTI::TCC_Basic; 53 54 case Instruction::GetElementPtr: 55 llvm_unreachable("Use getGEPCost for GEP operations!"); 56 57 case Instruction::BitCast: 58 assert(OpTy && "Cast instructions must provide the operand type"); 59 if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy())) 60 // Identity and pointer-to-pointer casts are free. 61 return TTI::TCC_Free; 62 63 // Otherwise, the default basic cost is used. 64 return TTI::TCC_Basic; 65 66 case Instruction::FDiv: 67 case Instruction::FRem: 68 case Instruction::SDiv: 69 case Instruction::SRem: 70 case Instruction::UDiv: 71 case Instruction::URem: 72 return TTI::TCC_Expensive; 73 74 case Instruction::IntToPtr: { 75 // An inttoptr cast is free so long as the input is a legal integer type 76 // which doesn't contain values outside the range of a pointer. 77 unsigned OpSize = OpTy->getScalarSizeInBits(); 78 if (DL.isLegalInteger(OpSize) && 79 OpSize <= DL.getPointerTypeSizeInBits(Ty)) 80 return TTI::TCC_Free; 81 82 // Otherwise it's not a no-op. 83 return TTI::TCC_Basic; 84 } 85 case Instruction::PtrToInt: { 86 // A ptrtoint cast is free so long as the result is large enough to store 87 // the pointer, and a legal integer type. 88 unsigned DestSize = Ty->getScalarSizeInBits(); 89 if (DL.isLegalInteger(DestSize) && 90 DestSize >= DL.getPointerTypeSizeInBits(OpTy)) 91 return TTI::TCC_Free; 92 93 // Otherwise it's not a no-op. 94 return TTI::TCC_Basic; 95 } 96 case Instruction::Trunc: 97 // trunc to a native type is free (assuming the target has compare and 98 // shift-right of the same width). 99 if (DL.isLegalInteger(DL.getTypeSizeInBits(Ty))) 100 return TTI::TCC_Free; 101 102 return TTI::TCC_Basic; 103 } 104 } 105 106 int getGEPCost(Type *PointeeType, const Value *Ptr, 107 ArrayRef<const Value *> Operands) { 108 // In the basic model, we just assume that all-constant GEPs will be folded 109 // into their uses via addressing modes. 110 for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) 111 if (!isa<Constant>(Operands[Idx])) 112 return TTI::TCC_Basic; 113 114 return TTI::TCC_Free; 115 } 116 117 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, 118 unsigned &JTSize) { 119 JTSize = 0; 120 return SI.getNumCases(); 121 } 122 123 int getExtCost(const Instruction *I, const Value *Src) { 124 return TTI::TCC_Basic; 125 } 126 127 unsigned getCallCost(FunctionType *FTy, int NumArgs) { 128 assert(FTy && "FunctionType must be provided to this routine."); 129 130 // The target-independent implementation just measures the size of the 131 // function by approximating that each argument will take on average one 132 // instruction to prepare. 133 134 if (NumArgs < 0) 135 // Set the argument number to the number of explicit arguments in the 136 // function. 137 NumArgs = FTy->getNumParams(); 138 139 return TTI::TCC_Basic * (NumArgs + 1); 140 } 141 142 unsigned getInliningThresholdMultiplier() { return 1; } 143 144 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 145 ArrayRef<Type *> ParamTys) { 146 switch (IID) { 147 default: 148 // Intrinsics rarely (if ever) have normal argument setup constraints. 149 // Model them as having a basic instruction cost. 150 // FIXME: This is wrong for libc intrinsics. 151 return TTI::TCC_Basic; 152 153 case Intrinsic::annotation: 154 case Intrinsic::assume: 155 case Intrinsic::dbg_declare: 156 case Intrinsic::dbg_value: 157 case Intrinsic::invariant_start: 158 case Intrinsic::invariant_end: 159 case Intrinsic::lifetime_start: 160 case Intrinsic::lifetime_end: 161 case Intrinsic::objectsize: 162 case Intrinsic::ptr_annotation: 163 case Intrinsic::var_annotation: 164 case Intrinsic::experimental_gc_result: 165 case Intrinsic::experimental_gc_relocate: 166 case Intrinsic::coro_alloc: 167 case Intrinsic::coro_begin: 168 case Intrinsic::coro_free: 169 case Intrinsic::coro_end: 170 case Intrinsic::coro_frame: 171 case Intrinsic::coro_size: 172 case Intrinsic::coro_suspend: 173 case Intrinsic::coro_param: 174 case Intrinsic::coro_subfn_addr: 175 // These intrinsics don't actually represent code after lowering. 176 return TTI::TCC_Free; 177 } 178 } 179 180 bool hasBranchDivergence() { return false; } 181 182 bool isSourceOfDivergence(const Value *V) { return false; } 183 184 bool isAlwaysUniform(const Value *V) { return false; } 185 186 unsigned getFlatAddressSpace () { 187 return -1; 188 } 189 190 bool isLoweredToCall(const Function *F) { 191 assert(F && "A concrete function must be provided to this routine."); 192 193 // FIXME: These should almost certainly not be handled here, and instead 194 // handled with the help of TLI or the target itself. This was largely 195 // ported from existing analysis heuristics here so that such refactorings 196 // can take place in the future. 197 198 if (F->isIntrinsic()) 199 return false; 200 201 if (F->hasLocalLinkage() || !F->hasName()) 202 return true; 203 204 StringRef Name = F->getName(); 205 206 // These will all likely lower to a single selection DAG node. 207 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || 208 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || 209 Name == "fmin" || Name == "fminf" || Name == "fminl" || 210 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || 211 Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || 212 Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") 213 return false; 214 215 // These are all likely to be optimized into something smaller. 216 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || 217 Name == "exp2l" || Name == "exp2f" || Name == "floor" || 218 Name == "floorf" || Name == "ceil" || Name == "round" || 219 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || 220 Name == "llabs") 221 return false; 222 223 return true; 224 } 225 226 void getUnrollingPreferences(Loop *, ScalarEvolution &, 227 TTI::UnrollingPreferences &) {} 228 229 bool isLegalAddImmediate(int64_t Imm) { return false; } 230 231 bool isLegalICmpImmediate(int64_t Imm) { return false; } 232 233 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 234 bool HasBaseReg, int64_t Scale, 235 unsigned AddrSpace, Instruction *I = nullptr) { 236 // Guess that only reg and reg+reg addressing is allowed. This heuristic is 237 // taken from the implementation of LSR. 238 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); 239 } 240 241 bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) { 242 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, 243 C1.ScaleCost, C1.ImmCost, C1.SetupCost) < 244 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, 245 C2.ScaleCost, C2.ImmCost, C2.SetupCost); 246 } 247 248 bool isLegalMaskedStore(Type *DataType) { return false; } 249 250 bool isLegalMaskedLoad(Type *DataType) { return false; } 251 252 bool isLegalMaskedScatter(Type *DataType) { return false; } 253 254 bool isLegalMaskedGather(Type *DataType) { return false; } 255 256 bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; } 257 258 bool prefersVectorizedAddressing() { return true; } 259 260 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 261 bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { 262 // Guess that all legal addressing mode are free. 263 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, 264 Scale, AddrSpace)) 265 return 0; 266 return -1; 267 } 268 269 bool LSRWithInstrQueries() { return false; } 270 271 bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; } 272 273 bool isProfitableToHoist(Instruction *I) { return true; } 274 275 bool isTypeLegal(Type *Ty) { return false; } 276 277 unsigned getJumpBufAlignment() { return 0; } 278 279 unsigned getJumpBufSize() { return 0; } 280 281 bool shouldBuildLookupTables() { return true; } 282 bool shouldBuildLookupTablesForConstant(Constant *C) { return true; } 283 284 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { 285 return 0; 286 } 287 288 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, 289 unsigned VF) { return 0; } 290 291 bool supportsEfficientVectorElementLoadStore() { return false; } 292 293 bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } 294 295 bool enableMemCmpExpansion(unsigned &MaxLoadSize) { return false; } 296 297 bool enableInterleavedAccessVectorization() { return false; } 298 299 bool isFPVectorizationPotentiallyUnsafe() { return false; } 300 301 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, 302 unsigned BitWidth, 303 unsigned AddressSpace, 304 unsigned Alignment, 305 bool *Fast) { return false; } 306 307 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) { 308 return TTI::PSK_Software; 309 } 310 311 bool haveFastSqrt(Type *Ty) { return false; } 312 313 unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; } 314 315 int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, 316 Type *Ty) { 317 return 0; 318 } 319 320 unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; } 321 322 unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, 323 Type *Ty) { 324 return TTI::TCC_Free; 325 } 326 327 unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, 328 Type *Ty) { 329 return TTI::TCC_Free; 330 } 331 332 unsigned getNumberOfRegisters(bool Vector) { return 8; } 333 334 unsigned getRegisterBitWidth(bool Vector) const { return 32; } 335 336 unsigned getMinVectorRegisterBitWidth() { return 128; } 337 338 bool 339 shouldConsiderAddressTypePromotion(const Instruction &I, 340 bool &AllowPromotionWithoutCommonHeader) { 341 AllowPromotionWithoutCommonHeader = false; 342 return false; 343 } 344 345 unsigned getCacheLineSize() { return 0; } 346 347 llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) { 348 switch (Level) { 349 case TargetTransformInfo::CacheLevel::L1D: 350 LLVM_FALLTHROUGH; 351 case TargetTransformInfo::CacheLevel::L2D: 352 return llvm::Optional<unsigned>(); 353 } 354 355 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 356 } 357 358 llvm::Optional<unsigned> getCacheAssociativity( 359 TargetTransformInfo::CacheLevel Level) { 360 switch (Level) { 361 case TargetTransformInfo::CacheLevel::L1D: 362 LLVM_FALLTHROUGH; 363 case TargetTransformInfo::CacheLevel::L2D: 364 return llvm::Optional<unsigned>(); 365 } 366 367 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 368 } 369 370 unsigned getPrefetchDistance() { return 0; } 371 372 unsigned getMinPrefetchStride() { return 1; } 373 374 unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; } 375 376 unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } 377 378 unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, 379 TTI::OperandValueKind Opd1Info, 380 TTI::OperandValueKind Opd2Info, 381 TTI::OperandValueProperties Opd1PropInfo, 382 TTI::OperandValueProperties Opd2PropInfo, 383 ArrayRef<const Value *> Args) { 384 return 1; 385 } 386 387 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index, 388 Type *SubTp) { 389 return 1; 390 } 391 392 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 393 const Instruction *I) { return 1; } 394 395 unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, 396 VectorType *VecTy, unsigned Index) { 397 return 1; 398 } 399 400 unsigned getCFInstrCost(unsigned Opcode) { return 1; } 401 402 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 403 const Instruction *I) { 404 return 1; 405 } 406 407 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { 408 return 1; 409 } 410 411 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 412 unsigned AddressSpace, const Instruction *I) { 413 return 1; 414 } 415 416 unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 417 unsigned AddressSpace) { 418 return 1; 419 } 420 421 unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, 422 bool VariableMask, 423 unsigned Alignment) { 424 return 1; 425 } 426 427 unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, 428 unsigned Factor, 429 ArrayRef<unsigned> Indices, 430 unsigned Alignment, 431 unsigned AddressSpace) { 432 return 1; 433 } 434 435 unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, 436 ArrayRef<Type *> Tys, FastMathFlags FMF, 437 unsigned ScalarizationCostPassed) { 438 return 1; 439 } 440 unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, 441 ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) { 442 return 1; 443 } 444 445 unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { 446 return 1; 447 } 448 449 unsigned getNumberOfParts(Type *Tp) { return 0; } 450 451 unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *, 452 const SCEV *) { 453 return 0; 454 } 455 456 unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; } 457 458 unsigned getMinMaxReductionCost(Type *, Type *, bool, bool) { return 1; } 459 460 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; } 461 462 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) { 463 return false; 464 } 465 466 unsigned getAtomicMemIntrinsicMaxElementSize() const { 467 // Note for overrides: You must ensure for all element unordered-atomic 468 // memory intrinsics that all power-of-2 element sizes up to, and 469 // including, the return value of this method have a corresponding 470 // runtime lib call. These runtime lib call definitions can be found 471 // in RuntimeLibcalls.h 472 return 0; 473 } 474 475 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 476 Type *ExpectedType) { 477 return nullptr; 478 } 479 480 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 481 unsigned SrcAlign, unsigned DestAlign) const { 482 return Type::getInt8Ty(Context); 483 } 484 485 void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut, 486 LLVMContext &Context, 487 unsigned RemainingBytes, 488 unsigned SrcAlign, 489 unsigned DestAlign) const { 490 for (unsigned i = 0; i != RemainingBytes; ++i) 491 OpsOut.push_back(Type::getInt8Ty(Context)); 492 } 493 494 bool areInlineCompatible(const Function *Caller, 495 const Function *Callee) const { 496 return (Caller->getFnAttribute("target-cpu") == 497 Callee->getFnAttribute("target-cpu")) && 498 (Caller->getFnAttribute("target-features") == 499 Callee->getFnAttribute("target-features")); 500 } 501 502 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; } 503 504 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; } 505 506 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; } 507 508 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, 509 unsigned Alignment, 510 unsigned AddrSpace) const { 511 return true; 512 } 513 514 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, 515 unsigned Alignment, 516 unsigned AddrSpace) const { 517 return true; 518 } 519 520 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 521 unsigned ChainSizeInBytes, 522 VectorType *VecTy) const { 523 return VF; 524 } 525 526 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 527 unsigned ChainSizeInBytes, 528 VectorType *VecTy) const { 529 return VF; 530 } 531 532 bool useReductionIntrinsic(unsigned Opcode, Type *Ty, 533 TTI::ReductionFlags Flags) const { 534 return false; 535 } 536 537 bool shouldExpandReduction(const IntrinsicInst *II) const { 538 return true; 539 } 540 541 protected: 542 // Obtain the minimum required size to hold the value (without the sign) 543 // In case of a vector it returns the min required size for one element. 544 unsigned minRequiredElementSize(const Value* Val, bool &isSigned) { 545 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) { 546 const auto* VectorValue = cast<Constant>(Val); 547 548 // In case of a vector need to pick the max between the min 549 // required size for each element 550 auto *VT = cast<VectorType>(Val->getType()); 551 552 // Assume unsigned elements 553 isSigned = false; 554 555 // The max required size is the total vector width divided by num 556 // of elements in the vector 557 unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements(); 558 559 unsigned MinRequiredSize = 0; 560 for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) { 561 if (auto* IntElement = 562 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) { 563 bool signedElement = IntElement->getValue().isNegative(); 564 // Get the element min required size. 565 unsigned ElementMinRequiredSize = 566 IntElement->getValue().getMinSignedBits() - 1; 567 // In case one element is signed then all the vector is signed. 568 isSigned |= signedElement; 569 // Save the max required bit size between all the elements. 570 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize); 571 } 572 else { 573 // not an int constant element 574 return MaxRequiredSize; 575 } 576 } 577 return MinRequiredSize; 578 } 579 580 if (const auto* CI = dyn_cast<ConstantInt>(Val)) { 581 isSigned = CI->getValue().isNegative(); 582 return CI->getValue().getMinSignedBits() - 1; 583 } 584 585 if (const auto* Cast = dyn_cast<SExtInst>(Val)) { 586 isSigned = true; 587 return Cast->getSrcTy()->getScalarSizeInBits() - 1; 588 } 589 590 if (const auto* Cast = dyn_cast<ZExtInst>(Val)) { 591 isSigned = false; 592 return Cast->getSrcTy()->getScalarSizeInBits(); 593 } 594 595 isSigned = false; 596 return Val->getType()->getScalarSizeInBits(); 597 } 598 599 bool isStridedAccess(const SCEV *Ptr) { 600 return Ptr && isa<SCEVAddRecExpr>(Ptr); 601 } 602 603 const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, 604 const SCEV *Ptr) { 605 if (!isStridedAccess(Ptr)) 606 return nullptr; 607 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr); 608 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE)); 609 } 610 611 bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, 612 int64_t MergeDistance) { 613 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); 614 if (!Step) 615 return false; 616 APInt StrideVal = Step->getAPInt(); 617 if (StrideVal.getBitWidth() > 64) 618 return false; 619 // FIXME: Need to take absolute value for negative stride case. 620 return StrideVal.getSExtValue() < MergeDistance; 621 } 622 }; 623 624 /// \brief CRTP base class for use as a mix-in that aids implementing 625 /// a TargetTransformInfo-compatible class. 626 template <typename T> 627 class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { 628 private: 629 typedef TargetTransformInfoImplBase BaseT; 630 631 protected: 632 explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {} 633 634 public: 635 using BaseT::getCallCost; 636 637 unsigned getCallCost(const Function *F, int NumArgs) { 638 assert(F && "A concrete function must be provided to this routine."); 639 640 if (NumArgs < 0) 641 // Set the argument number to the number of explicit arguments in the 642 // function. 643 NumArgs = F->arg_size(); 644 645 if (Intrinsic::ID IID = F->getIntrinsicID()) { 646 FunctionType *FTy = F->getFunctionType(); 647 SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end()); 648 return static_cast<T *>(this) 649 ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys); 650 } 651 652 if (!static_cast<T *>(this)->isLoweredToCall(F)) 653 return TTI::TCC_Basic; // Give a basic cost if it will be lowered 654 // directly. 655 656 return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs); 657 } 658 659 unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments) { 660 // Simply delegate to generic handling of the call. 661 // FIXME: We should use instsimplify or something else to catch calls which 662 // will constant fold with these arguments. 663 return static_cast<T *>(this)->getCallCost(F, Arguments.size()); 664 } 665 666 using BaseT::getGEPCost; 667 668 int getGEPCost(Type *PointeeType, const Value *Ptr, 669 ArrayRef<const Value *> Operands) { 670 const GlobalValue *BaseGV = nullptr; 671 if (Ptr != nullptr) { 672 // TODO: will remove this when pointers have an opaque type. 673 assert(Ptr->getType()->getScalarType()->getPointerElementType() == 674 PointeeType && 675 "explicit pointee type doesn't match operand's pointee type"); 676 BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts()); 677 } 678 bool HasBaseReg = (BaseGV == nullptr); 679 680 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType()); 681 APInt BaseOffset(PtrSizeBits, 0); 682 int64_t Scale = 0; 683 684 auto GTI = gep_type_begin(PointeeType, Operands); 685 Type *TargetType = nullptr; 686 687 // Handle the case where the GEP instruction has a single operand, 688 // the basis, therefore TargetType is a nullptr. 689 if (Operands.empty()) 690 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic; 691 692 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { 693 TargetType = GTI.getIndexedType(); 694 // We assume that the cost of Scalar GEP with constant index and the 695 // cost of Vector GEP with splat constant index are the same. 696 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); 697 if (!ConstIdx) 698 if (auto Splat = getSplatValue(*I)) 699 ConstIdx = dyn_cast<ConstantInt>(Splat); 700 if (StructType *STy = GTI.getStructTypeOrNull()) { 701 // For structures the index is always splat or scalar constant 702 assert(ConstIdx && "Unexpected GEP index"); 703 uint64_t Field = ConstIdx->getZExtValue(); 704 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); 705 } else { 706 int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType()); 707 if (ConstIdx) { 708 BaseOffset += 709 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; 710 } else { 711 // Needs scale register. 712 if (Scale != 0) 713 // No addressing mode takes two scale registers. 714 return TTI::TCC_Basic; 715 Scale = ElementSize; 716 } 717 } 718 } 719 720 // Assumes the address space is 0 when Ptr is nullptr. 721 unsigned AS = 722 (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace()); 723 724 if (static_cast<T *>(this)->isLegalAddressingMode( 725 TargetType, const_cast<GlobalValue *>(BaseGV), 726 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, AS)) 727 return TTI::TCC_Free; 728 return TTI::TCC_Basic; 729 } 730 731 using BaseT::getIntrinsicCost; 732 733 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 734 ArrayRef<const Value *> Arguments) { 735 // Delegate to the generic intrinsic handling code. This mostly provides an 736 // opportunity for targets to (for example) special case the cost of 737 // certain intrinsics based on constants used as arguments. 738 SmallVector<Type *, 8> ParamTys; 739 ParamTys.reserve(Arguments.size()); 740 for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) 741 ParamTys.push_back(Arguments[Idx]->getType()); 742 return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys); 743 } 744 745 unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands) { 746 if (isa<PHINode>(U)) 747 return TTI::TCC_Free; // Model all PHI nodes as free. 748 749 // Static alloca doesn't generate target instructions. 750 if (auto *A = dyn_cast<AllocaInst>(U)) 751 if (A->isStaticAlloca()) 752 return TTI::TCC_Free; 753 754 if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) { 755 return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(), 756 GEP->getPointerOperand(), 757 Operands.drop_front()); 758 } 759 760 if (auto CS = ImmutableCallSite(U)) { 761 const Function *F = CS.getCalledFunction(); 762 if (!F) { 763 // Just use the called value type. 764 Type *FTy = CS.getCalledValue()->getType()->getPointerElementType(); 765 return static_cast<T *>(this) 766 ->getCallCost(cast<FunctionType>(FTy), CS.arg_size()); 767 } 768 769 SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end()); 770 return static_cast<T *>(this)->getCallCost(F, Arguments); 771 } 772 773 if (const CastInst *CI = dyn_cast<CastInst>(U)) { 774 // Result of a cmp instruction is often extended (to be used by other 775 // cmp instructions, logical or return instructions). These are usually 776 // nop on most sane targets. 777 if (isa<CmpInst>(CI->getOperand(0))) 778 return TTI::TCC_Free; 779 if (isa<SExtInst>(CI) || isa<ZExtInst>(CI) || isa<FPExtInst>(CI)) 780 return static_cast<T *>(this)->getExtCost(CI, Operands.back()); 781 } 782 783 return static_cast<T *>(this)->getOperationCost( 784 Operator::getOpcode(U), U->getType(), 785 U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr); 786 } 787 788 int getInstructionLatency(const Instruction *I) { 789 SmallVector<const Value *, 4> Operands(I->value_op_begin(), 790 I->value_op_end()); 791 if (getUserCost(I, Operands) == TTI::TCC_Free) 792 return 0; 793 794 if (isa<LoadInst>(I)) 795 return 4; 796 797 Type *DstTy = I->getType(); 798 799 // Usually an intrinsic is a simple instruction. 800 // A real function call is much slower. 801 if (auto *CI = dyn_cast<CallInst>(I)) { 802 const Function *F = CI->getCalledFunction(); 803 if (!F || static_cast<T *>(this)->isLoweredToCall(F)) 804 return 40; 805 // Some intrinsics return a value and a flag, we use the value type 806 // to decide its latency. 807 if (StructType* StructTy = dyn_cast<StructType>(DstTy)) 808 DstTy = StructTy->getElementType(0); 809 // Fall through to simple instructions. 810 } 811 812 if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy)) 813 DstTy = VectorTy->getElementType(); 814 if (DstTy->isFloatingPointTy()) 815 return 3; 816 817 return 1; 818 } 819 }; 820 } 821 822 #endif 823