Home | History | Annotate | Download | only in PowerPC
      1 //===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 /// \file
     10 /// This file implements a TargetTransformInfo analysis pass specific to the
     11 /// PPC target machine. It uses the target's detailed information to provide
     12 /// more precise answers to certain TTI queries, while letting the target
     13 /// independent and default TTI implementations handle the rest.
     14 ///
     15 //===----------------------------------------------------------------------===//
     16 
     17 #include "PPC.h"
     18 #include "PPCTargetMachine.h"
     19 #include "llvm/Analysis/TargetTransformInfo.h"
     20 #include "llvm/Support/CommandLine.h"
     21 #include "llvm/Support/Debug.h"
     22 #include "llvm/Target/CostTable.h"
     23 #include "llvm/Target/TargetLowering.h"
     24 using namespace llvm;
     25 
     26 #define DEBUG_TYPE "ppctti"
     27 
     28 static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
     29 cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
     30 
     31 // Declare the pass initialization routine locally as target-specific passes
     32 // don't have a target-wide initialization entry point, and so we rely on the
     33 // pass constructor initialization.
     34 namespace llvm {
     35 void initializePPCTTIPass(PassRegistry &);
     36 }
     37 
     38 namespace {
     39 
     40 class PPCTTI final : public ImmutablePass, public TargetTransformInfo {
     41   const PPCSubtarget *ST;
     42   const PPCTargetLowering *TLI;
     43 
     44 public:
     45   PPCTTI() : ImmutablePass(ID), ST(nullptr), TLI(nullptr) {
     46     llvm_unreachable("This pass cannot be directly constructed");
     47   }
     48 
     49   PPCTTI(const PPCTargetMachine *TM)
     50       : ImmutablePass(ID), ST(TM->getSubtargetImpl()),
     51         TLI(TM->getTargetLowering()) {
     52     initializePPCTTIPass(*PassRegistry::getPassRegistry());
     53   }
     54 
     55   virtual void initializePass() override {
     56     pushTTIStack(this);
     57   }
     58 
     59   virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
     60     TargetTransformInfo::getAnalysisUsage(AU);
     61   }
     62 
     63   /// Pass identification.
     64   static char ID;
     65 
     66   /// Provide necessary pointer adjustments for the two base classes.
     67   virtual void *getAdjustedAnalysisPointer(const void *ID) override {
     68     if (ID == &TargetTransformInfo::ID)
     69       return (TargetTransformInfo*)this;
     70     return this;
     71   }
     72 
     73   /// \name Scalar TTI Implementations
     74   /// @{
     75   unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
     76 
     77   unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
     78                          Type *Ty) const override;
     79   unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
     80                          Type *Ty) const override;
     81 
     82   virtual PopcntSupportKind
     83   getPopcntSupport(unsigned TyWidth) const override;
     84   virtual void getUnrollingPreferences(
     85     Loop *L, UnrollingPreferences &UP) const override;
     86 
     87   /// @}
     88 
     89   /// \name Vector TTI Implementations
     90   /// @{
     91 
     92   virtual unsigned getNumberOfRegisters(bool Vector) const override;
     93   virtual unsigned getRegisterBitWidth(bool Vector) const override;
     94   virtual unsigned getMaximumUnrollFactor() const override;
     95   virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
     96                                           OperandValueKind,
     97                                           OperandValueKind) const override;
     98   virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
     99                                   int Index, Type *SubTp) const override;
    100   virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
    101                                     Type *Src) const override;
    102   virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
    103                                       Type *CondTy) const override;
    104   virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
    105                                       unsigned Index) const override;
    106   virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
    107                                    unsigned Alignment,
    108                                    unsigned AddressSpace) const override;
    109 
    110   /// @}
    111 };
    112 
    113 } // end anonymous namespace
    114 
    115 INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti",
    116                    "PPC Target Transform Info", true, true, false)
    117 char PPCTTI::ID = 0;
    118 
    119 ImmutablePass *
    120 llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) {
    121   return new PPCTTI(TM);
    122 }
    123 
    124 
    125 //===----------------------------------------------------------------------===//
    126 //
    127 // PPC cost model.
    128 //
    129 //===----------------------------------------------------------------------===//
    130 
    131 PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
    132   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
    133   if (ST->hasPOPCNTD() && TyWidth <= 64)
    134     return PSK_FastHardware;
    135   return PSK_Software;
    136 }
    137 
    138 unsigned PPCTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
    139   if (DisablePPCConstHoist)
    140     return TargetTransformInfo::getIntImmCost(Imm, Ty);
    141 
    142   assert(Ty->isIntegerTy());
    143 
    144   unsigned BitSize = Ty->getPrimitiveSizeInBits();
    145   if (BitSize == 0)
    146     return ~0U;
    147 
    148   if (Imm == 0)
    149     return TCC_Free;
    150 
    151   if (Imm.getBitWidth() <= 64) {
    152     if (isInt<16>(Imm.getSExtValue()))
    153       return TCC_Basic;
    154 
    155     if (isInt<32>(Imm.getSExtValue())) {
    156       // A constant that can be materialized using lis.
    157       if ((Imm.getZExtValue() & 0xFFFF) == 0)
    158         return TCC_Basic;
    159 
    160       return 2 * TCC_Basic;
    161     }
    162   }
    163 
    164   return 4 * TCC_Basic;
    165 }
    166 
    167 unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
    168                                const APInt &Imm, Type *Ty) const {
    169   if (DisablePPCConstHoist)
    170     return TargetTransformInfo::getIntImmCost(IID, Idx, Imm, Ty);
    171 
    172   assert(Ty->isIntegerTy());
    173 
    174   unsigned BitSize = Ty->getPrimitiveSizeInBits();
    175   if (BitSize == 0)
    176     return ~0U;
    177 
    178   switch (IID) {
    179   default: return TCC_Free;
    180   case Intrinsic::sadd_with_overflow:
    181   case Intrinsic::uadd_with_overflow:
    182   case Intrinsic::ssub_with_overflow:
    183   case Intrinsic::usub_with_overflow:
    184     if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
    185       return TCC_Free;
    186     break;
    187   }
    188   return PPCTTI::getIntImmCost(Imm, Ty);
    189 }
    190 
    191 unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
    192                                Type *Ty) const {
    193   if (DisablePPCConstHoist)
    194     return TargetTransformInfo::getIntImmCost(Opcode, Idx, Imm, Ty);
    195 
    196   assert(Ty->isIntegerTy());
    197 
    198   unsigned BitSize = Ty->getPrimitiveSizeInBits();
    199   if (BitSize == 0)
    200     return ~0U;
    201 
    202   unsigned ImmIdx = ~0U;
    203   bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
    204        ZeroFree = false;
    205   switch (Opcode) {
    206   default: return TCC_Free;
    207   case Instruction::GetElementPtr:
    208     // Always hoist the base address of a GetElementPtr. This prevents the
    209     // creation of new constants for every base constant that gets constant
    210     // folded with the offset.
    211     if (Idx == 0)
    212       return 2 * TCC_Basic;
    213     return TCC_Free;
    214   case Instruction::And:
    215     RunFree = true; // (for the rotate-and-mask instructions)
    216     // Fallthrough...
    217   case Instruction::Add:
    218   case Instruction::Or:
    219   case Instruction::Xor:
    220     ShiftedFree = true;
    221     // Fallthrough...
    222   case Instruction::Sub:
    223   case Instruction::Mul:
    224   case Instruction::Shl:
    225   case Instruction::LShr:
    226   case Instruction::AShr:
    227     ImmIdx = 1;
    228     break;
    229   case Instruction::ICmp:
    230     UnsignedFree = true;
    231     ImmIdx = 1;
    232     // Fallthrough... (zero comparisons can use record-form instructions)
    233   case Instruction::Select:
    234     ZeroFree = true;
    235     break;
    236   case Instruction::PHI:
    237   case Instruction::Call:
    238   case Instruction::Ret:
    239   case Instruction::Load:
    240   case Instruction::Store:
    241     break;
    242   }
    243 
    244   if (ZeroFree && Imm == 0)
    245     return TCC_Free;
    246 
    247   if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
    248     if (isInt<16>(Imm.getSExtValue()))
    249       return TCC_Free;
    250 
    251     if (RunFree) {
    252       if (Imm.getBitWidth() <= 32 &&
    253           (isShiftedMask_32(Imm.getZExtValue()) ||
    254            isShiftedMask_32(~Imm.getZExtValue())))
    255         return TCC_Free;
    256 
    257 
    258       if (ST->isPPC64() &&
    259           (isShiftedMask_64(Imm.getZExtValue()) ||
    260            isShiftedMask_64(~Imm.getZExtValue())))
    261         return TCC_Free;
    262     }
    263 
    264     if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
    265       return TCC_Free;
    266 
    267     if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
    268       return TCC_Free;
    269   }
    270 
    271   return PPCTTI::getIntImmCost(Imm, Ty);
    272 }
    273 
    274 void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
    275   if (ST->getDarwinDirective() == PPC::DIR_A2) {
    276     // The A2 is in-order with a deep pipeline, and concatenation unrolling
    277     // helps expose latency-hiding opportunities to the instruction scheduler.
    278     UP.Partial = UP.Runtime = true;
    279   }
    280 }
    281 
    282 unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
    283   if (Vector && !ST->hasAltivec())
    284     return 0;
    285   return ST->hasVSX() ? 64 : 32;
    286 }
    287 
    288 unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
    289   if (Vector) {
    290     if (ST->hasAltivec()) return 128;
    291     return 0;
    292   }
    293 
    294   if (ST->isPPC64())
    295     return 64;
    296   return 32;
    297 
    298 }
    299 
    300 unsigned PPCTTI::getMaximumUnrollFactor() const {
    301   unsigned Directive = ST->getDarwinDirective();
    302   // The 440 has no SIMD support, but floating-point instructions
    303   // have a 5-cycle latency, so unroll by 5x for latency hiding.
    304   if (Directive == PPC::DIR_440)
    305     return 5;
    306 
    307   // The A2 has no SIMD support, but floating-point instructions
    308   // have a 6-cycle latency, so unroll by 6x for latency hiding.
    309   if (Directive == PPC::DIR_A2)
    310     return 6;
    311 
    312   // FIXME: For lack of any better information, do no harm...
    313   if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)
    314     return 1;
    315 
    316   // For most things, modern systems have two execution units (and
    317   // out-of-order execution).
    318   return 2;
    319 }
    320 
    321 unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
    322                                         OperandValueKind Op1Info,
    323                                         OperandValueKind Op2Info) const {
    324   assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
    325 
    326   // Fallback to the default implementation.
    327   return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
    328                                                      Op2Info);
    329 }
    330 
    331 unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
    332                                 Type *SubTp) const {
    333   return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
    334 }
    335 
    336 unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
    337   assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
    338 
    339   return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
    340 }
    341 
    342 unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
    343                                     Type *CondTy) const {
    344   return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
    345 }
    346 
    347 unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
    348                                     unsigned Index) const {
    349   assert(Val->isVectorTy() && "This must be a vector type");
    350 
    351   int ISD = TLI->InstructionOpcodeToISD(Opcode);
    352   assert(ISD && "Invalid opcode");
    353 
    354   if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
    355     // Double-precision scalars are already located in index #0.
    356     if (Index == 0)
    357       return 0;
    358 
    359     return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
    360   }
    361 
    362   // Estimated cost of a load-hit-store delay.  This was obtained
    363   // experimentally as a minimum needed to prevent unprofitable
    364   // vectorization for the paq8p benchmark.  It may need to be
    365   // raised further if other unprofitable cases remain.
    366   unsigned LHSPenalty = 2;
    367   if (ISD == ISD::INSERT_VECTOR_ELT)
    368     LHSPenalty += 7;
    369 
    370   // Vector element insert/extract with Altivec is very expensive,
    371   // because they require store and reload with the attendant
    372   // processor stall for load-hit-store.  Until VSX is available,
    373   // these need to be estimated as very costly.
    374   if (ISD == ISD::EXTRACT_VECTOR_ELT ||
    375       ISD == ISD::INSERT_VECTOR_ELT)
    376     return LHSPenalty +
    377       TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
    378 
    379   return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
    380 }
    381 
    382 unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    383                                  unsigned AddressSpace) const {
    384   // Legalize the type.
    385   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
    386   assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
    387          "Invalid Opcode");
    388 
    389   unsigned Cost =
    390     TargetTransformInfo::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
    391 
    392   // VSX loads/stores support unaligned access.
    393   if (ST->hasVSX()) {
    394     if (LT.second == MVT::v2f64 || LT.second == MVT::v2i64)
    395       return Cost;
    396   }
    397 
    398   bool UnalignedAltivec =
    399     Src->isVectorTy() &&
    400     Src->getPrimitiveSizeInBits() >= LT.second.getSizeInBits() &&
    401     LT.second.getSizeInBits() == 128 &&
    402     Opcode == Instruction::Load;
    403 
    404   // PPC in general does not support unaligned loads and stores. They'll need
    405   // to be decomposed based on the alignment factor.
    406   unsigned SrcBytes = LT.second.getStoreSize();
    407   if (SrcBytes && Alignment && Alignment < SrcBytes && !UnalignedAltivec) {
    408     Cost += LT.first*(SrcBytes/Alignment-1);
    409 
    410     // For a vector type, there is also scalarization overhead (only for
    411     // stores, loads are expanded using the vector-load + permutation sequence,
    412     // which is much less expensive).
    413     if (Src->isVectorTy() && Opcode == Instruction::Store)
    414       for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
    415         Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
    416   }
    417 
    418   return Cost;
    419 }
    420 
    421