Cross Reference: /external/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines Matching refs:VF
236 /// block to a specified vectorization factor (VF).
256         VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()),
323   /// variable canonicalization. It supports both VF = 1 for unrolled loops and
326                            unsigned UF, unsigned VF, PhiVector *PV);
419   unsigned VF;
850   /// \return The most profitable vectorization factor and the cost of that VF.
851   /// This method checks every power of two up to VF. If UserVF is not ZERO
866   /// VF and LoopCost are the selected vectorization factor and the cost of the
867   /// selected VF.
868   unsigned selectUnrollFactor(bool OptForSize, unsigned UserUF, unsigned VF,
890   unsigned expectedCost(unsigned VF);
894   unsigned getInstructionCost(Instruction *I, unsigned VF);
897   /// If the incoming type is void, we return void. If the VF is 1, we return
899   static Type* ToVectorTy(Type *Scalar, unsigned VF);
1285     const LoopVectorizationCostModel::VectorizationFactor VF =
1292         CM.selectUnrollFactor(OptForSize, Hints.getUnroll(), VF.Width, VF.Cost);
1294     DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
1298     if (VF.Width == 1) {
1321       InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
1328           Twine("vectorized loop (vectorization factor: ") + Twine(VF.Width) +
1432   Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
1448   // Create a vector of consecutive numbers from zero to VF.
1601   for (unsigned i = 0; i < VF; ++i)
1602     ShuffleMask.push_back(Builder.getInt32(VF - i - 1));
1617   Type *DataTy = VectorType::get(ScalarDataTy, VF);
1626   unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF;
1708       Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
1716         PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
1717         PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
1732     Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
1737       PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
1738       PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
1790     UndefValue::get(VectorType::get(Instr->getType(), VF));
1812     for (unsigned Width = 0; Width < VF; ++Width) {
2142   Constant *Step = ConstantInt::get(IdxTy, VF * UF);
2164   // Now we need to generate the expression for N - (N % VF), which is
2166   Value *R = BypassBuilder.CreateURem(Count, Step, "n.mod.vf");
2395   // If (N - N%VF) == N, then we *don't* need to run the remainder.
2667       if (VF == 1) {
2670         VectorStart = Identity = Builder.CreateVectorSplat(VF,
2679       if (VF == 1) {
2685         Identity = ConstantVector::getSplat(VF, Iden);
2750     if (VF > 1) {
2751       // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
2754       assert(isPowerOf2_32(VF) &&
2757       SmallVector<Constant*, 32> ShuffleMask(VF, nullptr);
2758       for (unsigned i = VF; i != 1; i >>= 1) {
2904                                               unsigned UF, unsigned VF, PhiVector *PV) {
2910       Type *VecTy = (VF == 1) ? PN->getType() :
2911       VectorType::get(PN->getType(), VF);
2988         Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false);
3013           Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part,
3028         if (VF == 1) {
3043         Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
3044         for (unsigned int i = 0; i < VF; ++i) {
3045           int EltIndex = (i + part * VF) * (Reverse ? -1 : 1);
3076       widenPHIInstruction(it, Entry, UF, VF, PV);
3141       Value *ScalarCond = (VF == 1) ? Cond[0] :
3200           Entry[Part] = getConsecutiveVector(Broadcasted, VF * Part, false);
3204       Type *DestTy = (VF == 1) ? CI->getType() :
3205                                  VectorType::get(CI->getType(), VF);
3241           if (VF > 1)
3242             Tys[0] = VectorType::get(CI->getType()->getScalarType(), VF);
4358   for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues;
4359        vf *= 2) {
4360     if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) {
4361       MaxVFWithoutSLForwardIssues = (vf >>=1);
4496         " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
4907         // iteration, in which case we would loose "VF-1" iterations of the
4913         // before we feed back to the reduction phi. Otherwise, we loose VF-1
5204   unsigned VF = MaxVectorSize;
5215     VF = TC % MaxVectorSize;
5217     if (VF == 0)
5218       VF = MaxVectorSize;
5222     if (VF < 2) {
5229     assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
5230     DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
5244   if (ForceVectorization && VF > 1) {
5249   for (unsigned i=2; i <= VF; i*=2) {
5265   DEBUG(dbgs() << "LV: Selecting VF: "<< Width << ".\n");
5313                                                unsigned VF,
5348   unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1);
5352   if (VF == 1) {
5386   if (VF == 1) {
5394   // If we did not calculate the cost for VF (because the user selected the VF)
5395   // then we calculate the cost of VF here.
5397     LoopCost = expectedCost(VF);
5408   if (VF > 1 && Legal->getReductionVars()->size()) {
5416       (VF == 1 && Legal->getRuntimePointerCheck()->Need);
5559 unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
5574       unsigned C = getInstructionCost(it, VF);
5581       DEBUG(dbgs() << "LV: Found an estimated cost of " << C << " for VF " <<
5582             VF << " For instruction: " << *it << '\n');
5588     if (VF == 1 && Legal->blockNeedsPredication(*bb))
5657 LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
5661     VF = 1;
5664   Type *VectorTy = ToVectorTy(RetTy, VF);
5726       CondTy = VectorType::get(CondTy, VF);
5733     VectorTy = ToVectorTy(ValTy, VF);
5742     VectorTy = ToVectorTy(ValTy, VF);
5751     if (VF == 1)
5759     unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF;
5765       Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
5766       for (unsigned i = 0; i < VF; ++i) {
5778       Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation);
5779       Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
5812     Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
5819     Type *RetTy = ToVectorTy(CI->getType(), VF);
5822       Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF));
5831     if (!RetTy->isVoidTy() && VF != 1) {
5839       Cost += VF * (InsCost + ExtCost * I->getNumOperands());
5842     // The cost of executing VF copies of the scalar instruction. This opcode
5844     Cost += VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy);
5850 Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) {
5851   if (Scalar->isVoidTy() || VF == 1)
5853   return VectorType::get(Scalar, VF);
6015   // When unrolling and the VF is 1, we only need to add a simple scalar.
OpenGrok