Home | History | Annotate | Download | only in Vectorize

Lines Matching refs:VF

236 /// block to a specified vectorization factor (VF).
256 VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()),
323 /// variable canonicalization. It supports both VF = 1 for unrolled loops and
326 unsigned UF, unsigned VF, PhiVector *PV);
419 unsigned VF;
850 /// \return The most profitable vectorization factor and the cost of that VF.
851 /// This method checks every power of two up to VF. If UserVF is not ZERO
866 /// VF and LoopCost are the selected vectorization factor and the cost of the
867 /// selected VF.
868 unsigned selectUnrollFactor(bool OptForSize, unsigned UserUF, unsigned VF,
890 unsigned expectedCost(unsigned VF);
894 unsigned getInstructionCost(Instruction *I, unsigned VF);
897 /// If the incoming type is void, we return void. If the VF is 1, we return
899 static Type* ToVectorTy(Type *Scalar, unsigned VF);
1285 const LoopVectorizationCostModel::VectorizationFactor VF =
1292 CM.selectUnrollFactor(OptForSize, Hints.getUnroll(), VF.Width, VF.Cost);
1294 DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
1298 if (VF.Width == 1) {
1321 InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
1328 Twine("vectorized loop (vectorization factor: ") + Twine(VF.Width) +
1432 Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
1448 // Create a vector of consecutive numbers from zero to VF.
1601 for (unsigned i = 0; i < VF; ++i)
1602 ShuffleMask.push_back(Builder.getInt32(VF - i - 1));
1617 Type *DataTy = VectorType::get(ScalarDataTy, VF);
1626 unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF;
1708 Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
1716 PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
1717 PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
1732 Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
1737 PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
1738 PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
1790 UndefValue::get(VectorType::get(Instr->getType(), VF));
1812 for (unsigned Width = 0; Width < VF; ++Width) {
2142 Constant *Step = ConstantInt::get(IdxTy, VF * UF);
2164 // Now we need to generate the expression for N - (N % VF), which is
2166 Value *R = BypassBuilder.CreateURem(Count, Step, "n.mod.vf");
2395 // If (N - N%VF) == N, then we *don't* need to run the remainder.
2667 if (VF == 1) {
2670 VectorStart = Identity = Builder.CreateVectorSplat(VF,
2679 if (VF == 1) {
2685 Identity = ConstantVector::getSplat(VF, Iden);
2750 if (VF > 1) {
2751 // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
2754 assert(isPowerOf2_32(VF) &&
2757 SmallVector<Constant*, 32> ShuffleMask(VF, nullptr);
2758 for (unsigned i = VF; i != 1; i >>= 1) {
2904 unsigned UF, unsigned VF, PhiVector *PV) {
2910 Type *VecTy = (VF == 1) ? PN->getType() :
2911 VectorType::get(PN->getType(), VF);
2988 Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false);
3013 Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part,
3028 if (VF == 1) {
3043 Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
3044 for (unsigned int i = 0; i < VF; ++i) {
3045 int EltIndex = (i + part * VF) * (Reverse ? -1 : 1);
3076 widenPHIInstruction(it, Entry, UF, VF, PV);
3141 Value *ScalarCond = (VF == 1) ? Cond[0] :
3200 Entry[Part] = getConsecutiveVector(Broadcasted, VF * Part, false);
3204 Type *DestTy = (VF == 1) ? CI->getType() :
3205 VectorType::get(CI->getType(), VF);
3241 if (VF > 1)
3242 Tys[0] = VectorType::get(CI->getType()->getScalarType(), VF);
4358 for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues;
4359 vf *= 2) {
4360 if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) {
4361 MaxVFWithoutSLForwardIssues = (vf >>=1);
4496 " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
4907 // iteration, in which case we would loose "VF-1" iterations of the
4913 // before we feed back to the reduction phi. Otherwise, we loose VF-1
5204 unsigned VF = MaxVectorSize;
5215 VF = TC % MaxVectorSize;
5217 if (VF == 0)
5218 VF = MaxVectorSize;
5222 if (VF < 2) {
5229 assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
5230 DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
5244 if (ForceVectorization && VF > 1) {
5249 for (unsigned i=2; i <= VF; i*=2) {
5265 DEBUG(dbgs() << "LV: Selecting VF: "<< Width << ".\n");
5313 unsigned VF,
5348 unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1);
5352 if (VF == 1) {
5386 if (VF == 1) {
5394 // If we did not calculate the cost for VF (because the user selected the VF)
5395 // then we calculate the cost of VF here.
5397 LoopCost = expectedCost(VF);
5408 if (VF > 1 && Legal->getReductionVars()->size()) {
5416 (VF == 1 && Legal->getRuntimePointerCheck()->Need);
5559 unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
5574 unsigned C = getInstructionCost(it, VF);
5581 DEBUG(dbgs() << "LV: Found an estimated cost of " << C << " for VF " <<
5582 VF << " For instruction: " << *it << '\n');
5588 if (VF == 1 && Legal->blockNeedsPredication(*bb))
5657 LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
5661 VF = 1;
5664 Type *VectorTy = ToVectorTy(RetTy, VF);
5726 CondTy = VectorType::get(CondTy, VF);
5733 VectorTy = ToVectorTy(ValTy, VF);
5742 VectorTy = ToVectorTy(ValTy, VF);
5751 if (VF == 1)
5759 unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF;
5765 Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
5766 for (unsigned i = 0; i < VF; ++i) {
5778 Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation);
5779 Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
5812 Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
5819 Type *RetTy = ToVectorTy(CI->getType(), VF);
5822 Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF));
5831 if (!RetTy->isVoidTy() && VF != 1) {
5839 Cost += VF * (InsCost + ExtCost * I->getNumOperands());
5842 // The cost of executing VF copies of the scalar instruction. This opcode
5844 Cost += VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy);
5850 Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) {
5851 if (Scalar->isVoidTy() || VF == 1)
5853 return VectorType::get(Scalar, VF);
6015 // When unrolling and the VF is 1, we only need to add a simple scalar.