Home | History | Annotate | Download | only in InstCombine
      1 //===- InstCombineVectorOps.cpp -------------------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements instcombine for ExtractElement, InsertElement and
     11 // ShuffleVector.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "InstCombineInternal.h"
     16 #include "llvm/ADT/DenseMap.h"
     17 #include "llvm/Analysis/InstructionSimplify.h"
     18 #include "llvm/Analysis/VectorUtils.h"
     19 #include "llvm/IR/PatternMatch.h"
     20 using namespace llvm;
     21 using namespace PatternMatch;
     22 
     23 #define DEBUG_TYPE "instcombine"
     24 
     25 /// Return true if the value is cheaper to scalarize than it is to leave as a
     26 /// vector operation. isConstant indicates whether we're extracting one known
     27 /// element. If false we're extracting a variable index.
     28 static bool cheapToScalarize(Value *V, bool isConstant) {
     29   if (Constant *C = dyn_cast<Constant>(V)) {
     30     if (isConstant) return true;
     31 
     32     // If all elts are the same, we can extract it and use any of the values.
     33     if (Constant *Op0 = C->getAggregateElement(0U)) {
     34       for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e;
     35            ++i)
     36         if (C->getAggregateElement(i) != Op0)
     37           return false;
     38       return true;
     39     }
     40   }
     41   Instruction *I = dyn_cast<Instruction>(V);
     42   if (!I) return false;
     43 
     44   // Insert element gets simplified to the inserted element or is deleted if
     45   // this is constant idx extract element and its a constant idx insertelt.
     46   if (I->getOpcode() == Instruction::InsertElement && isConstant &&
     47       isa<ConstantInt>(I->getOperand(2)))
     48     return true;
     49   if (I->getOpcode() == Instruction::Load && I->hasOneUse())
     50     return true;
     51   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
     52     if (BO->hasOneUse() &&
     53         (cheapToScalarize(BO->getOperand(0), isConstant) ||
     54          cheapToScalarize(BO->getOperand(1), isConstant)))
     55       return true;
     56   if (CmpInst *CI = dyn_cast<CmpInst>(I))
     57     if (CI->hasOneUse() &&
     58         (cheapToScalarize(CI->getOperand(0), isConstant) ||
     59          cheapToScalarize(CI->getOperand(1), isConstant)))
     60       return true;
     61 
     62   return false;
     63 }
     64 
     65 // If we have a PHI node with a vector type that is only used to feed
     66 // itself and be an operand of extractelement at a constant location,
     67 // try to replace the PHI of the vector type with a PHI of a scalar type.
     68 Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
     69   SmallVector<Instruction *, 2> Extracts;
     70   // The users we want the PHI to have are:
     71   // 1) The EI ExtractElement (we already know this)
     72   // 2) Possibly more ExtractElements with the same index.
     73   // 3) Another operand, which will feed back into the PHI.
     74   Instruction *PHIUser = nullptr;
     75   for (auto U : PN->users()) {
     76     if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
     77       if (EI.getIndexOperand() == EU->getIndexOperand())
     78         Extracts.push_back(EU);
     79       else
     80         return nullptr;
     81     } else if (!PHIUser) {
     82       PHIUser = cast<Instruction>(U);
     83     } else {
     84       return nullptr;
     85     }
     86   }
     87 
     88   if (!PHIUser)
     89     return nullptr;
     90 
     91   // Verify that this PHI user has one use, which is the PHI itself,
     92   // and that it is a binary operation which is cheap to scalarize.
     93   // otherwise return NULL.
     94   if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
     95       !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
     96     return nullptr;
     97 
     98   // Create a scalar PHI node that will replace the vector PHI node
     99   // just before the current PHI node.
    100   PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
    101       PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
    102   // Scalarize each PHI operand.
    103   for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
    104     Value *PHIInVal = PN->getIncomingValue(i);
    105     BasicBlock *inBB = PN->getIncomingBlock(i);
    106     Value *Elt = EI.getIndexOperand();
    107     // If the operand is the PHI induction variable:
    108     if (PHIInVal == PHIUser) {
    109       // Scalarize the binary operation. Its first operand is the
    110       // scalar PHI, and the second operand is extracted from the other
    111       // vector operand.
    112       BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
    113       unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
    114       Value *Op = InsertNewInstWith(
    115           ExtractElementInst::Create(B0->getOperand(opId), Elt,
    116                                      B0->getOperand(opId)->getName() + ".Elt"),
    117           *B0);
    118       Value *newPHIUser = InsertNewInstWith(
    119           BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(),
    120                                                 scalarPHI, Op, B0), *B0);
    121       scalarPHI->addIncoming(newPHIUser, inBB);
    122     } else {
    123       // Scalarize PHI input:
    124       Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
    125       // Insert the new instruction into the predecessor basic block.
    126       Instruction *pos = dyn_cast<Instruction>(PHIInVal);
    127       BasicBlock::iterator InsertPos;
    128       if (pos && !isa<PHINode>(pos)) {
    129         InsertPos = ++pos->getIterator();
    130       } else {
    131         InsertPos = inBB->getFirstInsertionPt();
    132       }
    133 
    134       InsertNewInstWith(newEI, *InsertPos);
    135 
    136       scalarPHI->addIncoming(newEI, inBB);
    137     }
    138   }
    139 
    140   for (auto E : Extracts)
    141     replaceInstUsesWith(*E, scalarPHI);
    142 
    143   return &EI;
    144 }
    145 
    146 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
    147   if (Value *V = SimplifyExtractElementInst(
    148           EI.getVectorOperand(), EI.getIndexOperand(), DL, TLI, DT, AC))
    149     return replaceInstUsesWith(EI, V);
    150 
    151   // If vector val is constant with all elements the same, replace EI with
    152   // that element.  We handle a known element # below.
    153   if (Constant *C = dyn_cast<Constant>(EI.getOperand(0)))
    154     if (cheapToScalarize(C, false))
    155       return replaceInstUsesWith(EI, C->getAggregateElement(0U));
    156 
    157   // If extracting a specified index from the vector, see if we can recursively
    158   // find a previously computed scalar that was inserted into the vector.
    159   if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
    160     unsigned IndexVal = IdxC->getZExtValue();
    161     unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
    162 
    163     // InstSimplify handles cases where the index is invalid.
    164     assert(IndexVal < VectorWidth);
    165 
    166     // This instruction only demands the single element from the input vector.
    167     // If the input vector has a single use, simplify it based on this use
    168     // property.
    169     if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
    170       APInt UndefElts(VectorWidth, 0);
    171       APInt DemandedMask(VectorWidth, 0);
    172       DemandedMask.setBit(IndexVal);
    173       if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask,
    174                                                 UndefElts)) {
    175         EI.setOperand(0, V);
    176         return &EI;
    177       }
    178     }
    179 
    180     // If this extractelement is directly using a bitcast from a vector of
    181     // the same number of elements, see if we can find the source element from
    182     // it.  In this case, we will end up needing to bitcast the scalars.
    183     if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
    184       if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
    185         if (VT->getNumElements() == VectorWidth)
    186           if (Value *Elt = findScalarElement(BCI->getOperand(0), IndexVal))
    187             return new BitCastInst(Elt, EI.getType());
    188     }
    189 
    190     // If there's a vector PHI feeding a scalar use through this extractelement
    191     // instruction, try to scalarize the PHI.
    192     if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) {
    193       Instruction *scalarPHI = scalarizePHI(EI, PN);
    194       if (scalarPHI)
    195         return scalarPHI;
    196     }
    197   }
    198 
    199   if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
    200     // Push extractelement into predecessor operation if legal and
    201     // profitable to do so.
    202     if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
    203       if (I->hasOneUse() &&
    204           cheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
    205         Value *newEI0 =
    206           Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
    207                                         EI.getName()+".lhs");
    208         Value *newEI1 =
    209           Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
    210                                         EI.getName()+".rhs");
    211         return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(),
    212                                                      newEI0, newEI1, BO);
    213       }
    214     } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
    215       // Extracting the inserted element?
    216       if (IE->getOperand(2) == EI.getOperand(1))
    217         return replaceInstUsesWith(EI, IE->getOperand(1));
    218       // If the inserted and extracted elements are constants, they must not
    219       // be the same value, extract from the pre-inserted value instead.
    220       if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) {
    221         Worklist.AddValue(EI.getOperand(0));
    222         EI.setOperand(0, IE->getOperand(0));
    223         return &EI;
    224       }
    225     } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) {
    226       // If this is extracting an element from a shufflevector, figure out where
    227       // it came from and extract from the appropriate input element instead.
    228       if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
    229         int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
    230         Value *Src;
    231         unsigned LHSWidth =
    232           SVI->getOperand(0)->getType()->getVectorNumElements();
    233 
    234         if (SrcIdx < 0)
    235           return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
    236         if (SrcIdx < (int)LHSWidth)
    237           Src = SVI->getOperand(0);
    238         else {
    239           SrcIdx -= LHSWidth;
    240           Src = SVI->getOperand(1);
    241         }
    242         Type *Int32Ty = Type::getInt32Ty(EI.getContext());
    243         return ExtractElementInst::Create(Src,
    244                                           ConstantInt::get(Int32Ty,
    245                                                            SrcIdx, false));
    246       }
    247     } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
    248       // Canonicalize extractelement(cast) -> cast(extractelement).
    249       // Bitcasts can change the number of vector elements, and they cost
    250       // nothing.
    251       if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
    252         Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
    253                                                   EI.getIndexOperand());
    254         Worklist.AddValue(EE);
    255         return CastInst::Create(CI->getOpcode(), EE, EI.getType());
    256       }
    257     } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
    258       if (SI->hasOneUse()) {
    259         // TODO: For a select on vectors, it might be useful to do this if it
    260         // has multiple extractelement uses. For vector select, that seems to
    261         // fight the vectorizer.
    262 
    263         // If we are extracting an element from a vector select or a select on
    264         // vectors, create a select on the scalars extracted from the vector
    265         // arguments.
    266         Value *TrueVal = SI->getTrueValue();
    267         Value *FalseVal = SI->getFalseValue();
    268 
    269         Value *Cond = SI->getCondition();
    270         if (Cond->getType()->isVectorTy()) {
    271           Cond = Builder->CreateExtractElement(Cond,
    272                                                EI.getIndexOperand(),
    273                                                Cond->getName() + ".elt");
    274         }
    275 
    276         Value *V1Elem
    277           = Builder->CreateExtractElement(TrueVal,
    278                                           EI.getIndexOperand(),
    279                                           TrueVal->getName() + ".elt");
    280 
    281         Value *V2Elem
    282           = Builder->CreateExtractElement(FalseVal,
    283                                           EI.getIndexOperand(),
    284                                           FalseVal->getName() + ".elt");
    285         return SelectInst::Create(Cond,
    286                                   V1Elem,
    287                                   V2Elem,
    288                                   SI->getName() + ".elt");
    289       }
    290     }
    291   }
    292   return nullptr;
    293 }
    294 
    295 /// If V is a shuffle of values that ONLY returns elements from either LHS or
    296 /// RHS, return the shuffle mask and true. Otherwise, return false.
    297 static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
    298                                          SmallVectorImpl<Constant*> &Mask) {
    299   assert(LHS->getType() == RHS->getType() &&
    300          "Invalid CollectSingleShuffleElements");
    301   unsigned NumElts = V->getType()->getVectorNumElements();
    302 
    303   if (isa<UndefValue>(V)) {
    304     Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
    305     return true;
    306   }
    307 
    308   if (V == LHS) {
    309     for (unsigned i = 0; i != NumElts; ++i)
    310       Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
    311     return true;
    312   }
    313 
    314   if (V == RHS) {
    315     for (unsigned i = 0; i != NumElts; ++i)
    316       Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
    317                                       i+NumElts));
    318     return true;
    319   }
    320 
    321   if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
    322     // If this is an insert of an extract from some other vector, include it.
    323     Value *VecOp    = IEI->getOperand(0);
    324     Value *ScalarOp = IEI->getOperand(1);
    325     Value *IdxOp    = IEI->getOperand(2);
    326 
    327     if (!isa<ConstantInt>(IdxOp))
    328       return false;
    329     unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
    330 
    331     if (isa<UndefValue>(ScalarOp)) {  // inserting undef into vector.
    332       // We can handle this if the vector we are inserting into is
    333       // transitively ok.
    334       if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
    335         // If so, update the mask to reflect the inserted undef.
    336         Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
    337         return true;
    338       }
    339     } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
    340       if (isa<ConstantInt>(EI->getOperand(1))) {
    341         unsigned ExtractedIdx =
    342         cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
    343         unsigned NumLHSElts = LHS->getType()->getVectorNumElements();
    344 
    345         // This must be extracting from either LHS or RHS.
    346         if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
    347           // We can handle this if the vector we are inserting into is
    348           // transitively ok.
    349           if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
    350             // If so, update the mask to reflect the inserted value.
    351             if (EI->getOperand(0) == LHS) {
    352               Mask[InsertedIdx % NumElts] =
    353               ConstantInt::get(Type::getInt32Ty(V->getContext()),
    354                                ExtractedIdx);
    355             } else {
    356               assert(EI->getOperand(0) == RHS);
    357               Mask[InsertedIdx % NumElts] =
    358               ConstantInt::get(Type::getInt32Ty(V->getContext()),
    359                                ExtractedIdx + NumLHSElts);
    360             }
    361             return true;
    362           }
    363         }
    364       }
    365     }
    366   }
    367 
    368   return false;
    369 }
    370 
    371 /// If we have insertion into a vector that is wider than the vector that we
    372 /// are extracting from, try to widen the source vector to allow a single
    373 /// shufflevector to replace one or more insert/extract pairs.
    374 static void replaceExtractElements(InsertElementInst *InsElt,
    375                                    ExtractElementInst *ExtElt,
    376                                    InstCombiner &IC) {
    377   VectorType *InsVecType = InsElt->getType();
    378   VectorType *ExtVecType = ExtElt->getVectorOperandType();
    379   unsigned NumInsElts = InsVecType->getVectorNumElements();
    380   unsigned NumExtElts = ExtVecType->getVectorNumElements();
    381 
    382   // The inserted-to vector must be wider than the extracted-from vector.
    383   if (InsVecType->getElementType() != ExtVecType->getElementType() ||
    384       NumExtElts >= NumInsElts)
    385     return;
    386 
    387   // Create a shuffle mask to widen the extended-from vector using undefined
    388   // values. The mask selects all of the values of the original vector followed
    389   // by as many undefined values as needed to create a vector of the same length
    390   // as the inserted-to vector.
    391   SmallVector<Constant *, 16> ExtendMask;
    392   IntegerType *IntType = Type::getInt32Ty(InsElt->getContext());
    393   for (unsigned i = 0; i < NumExtElts; ++i)
    394     ExtendMask.push_back(ConstantInt::get(IntType, i));
    395   for (unsigned i = NumExtElts; i < NumInsElts; ++i)
    396     ExtendMask.push_back(UndefValue::get(IntType));
    397 
    398   Value *ExtVecOp = ExtElt->getVectorOperand();
    399   auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
    400   BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
    401                                    ? ExtVecOpInst->getParent()
    402                                    : ExtElt->getParent();
    403 
    404   // TODO: This restriction matches the basic block check below when creating
    405   // new extractelement instructions. If that limitation is removed, this one
    406   // could also be removed. But for now, we just bail out to ensure that we
    407   // will replace the extractelement instruction that is feeding our
    408   // insertelement instruction. This allows the insertelement to then be
    409   // replaced by a shufflevector. If the insertelement is not replaced, we can
    410   // induce infinite looping because there's an optimization for extractelement
    411   // that will delete our widening shuffle. This would trigger another attempt
    412   // here to create that shuffle, and we spin forever.
    413   if (InsertionBlock != InsElt->getParent())
    414     return;
    415 
    416   auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType),
    417                                         ConstantVector::get(ExtendMask));
    418 
    419   // Insert the new shuffle after the vector operand of the extract is defined
    420   // (as long as it's not a PHI) or at the start of the basic block of the
    421   // extract, so any subsequent extracts in the same basic block can use it.
    422   // TODO: Insert before the earliest ExtractElementInst that is replaced.
    423   if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
    424     WideVec->insertAfter(ExtVecOpInst);
    425   else
    426     IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
    427 
    428   // Replace extracts from the original narrow vector with extracts from the new
    429   // wide vector.
    430   for (User *U : ExtVecOp->users()) {
    431     ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
    432     if (!OldExt || OldExt->getParent() != WideVec->getParent())
    433       continue;
    434     auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
    435     NewExt->insertAfter(WideVec);
    436     IC.replaceInstUsesWith(*OldExt, NewExt);
    437   }
    438 }
    439 
    440 /// We are building a shuffle to create V, which is a sequence of insertelement,
    441 /// extractelement pairs. If PermittedRHS is set, then we must either use it or
    442 /// not rely on the second vector source. Return a std::pair containing the
    443 /// left and right vectors of the proposed shuffle (or 0), and set the Mask
    444 /// parameter as required.
    445 ///
    446 /// Note: we intentionally don't try to fold earlier shuffles since they have
    447 /// often been chosen carefully to be efficiently implementable on the target.
    448 typedef std::pair<Value *, Value *> ShuffleOps;
    449 
    450 static ShuffleOps collectShuffleElements(Value *V,
    451                                          SmallVectorImpl<Constant *> &Mask,
    452                                          Value *PermittedRHS,
    453                                          InstCombiner &IC) {
    454   assert(V->getType()->isVectorTy() && "Invalid shuffle!");
    455   unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
    456 
    457   if (isa<UndefValue>(V)) {
    458     Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
    459     return std::make_pair(
    460         PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
    461   }
    462 
    463   if (isa<ConstantAggregateZero>(V)) {
    464     Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
    465     return std::make_pair(V, nullptr);
    466   }
    467 
    468   if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
    469     // If this is an insert of an extract from some other vector, include it.
    470     Value *VecOp    = IEI->getOperand(0);
    471     Value *ScalarOp = IEI->getOperand(1);
    472     Value *IdxOp    = IEI->getOperand(2);
    473 
    474     if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
    475       if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
    476         unsigned ExtractedIdx =
    477           cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
    478         unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
    479 
    480         // Either the extracted from or inserted into vector must be RHSVec,
    481         // otherwise we'd end up with a shuffle of three inputs.
    482         if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
    483           Value *RHS = EI->getOperand(0);
    484           ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
    485           assert(LR.second == nullptr || LR.second == RHS);
    486 
    487           if (LR.first->getType() != RHS->getType()) {
    488             // Although we are giving up for now, see if we can create extracts
    489             // that match the inserts for another round of combining.
    490             replaceExtractElements(IEI, EI, IC);
    491 
    492             // We tried our best, but we can't find anything compatible with RHS
    493             // further up the chain. Return a trivial shuffle.
    494             for (unsigned i = 0; i < NumElts; ++i)
    495               Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i);
    496             return std::make_pair(V, nullptr);
    497           }
    498 
    499           unsigned NumLHSElts = RHS->getType()->getVectorNumElements();
    500           Mask[InsertedIdx % NumElts] =
    501             ConstantInt::get(Type::getInt32Ty(V->getContext()),
    502                              NumLHSElts+ExtractedIdx);
    503           return std::make_pair(LR.first, RHS);
    504         }
    505 
    506         if (VecOp == PermittedRHS) {
    507           // We've gone as far as we can: anything on the other side of the
    508           // extractelement will already have been converted into a shuffle.
    509           unsigned NumLHSElts =
    510               EI->getOperand(0)->getType()->getVectorNumElements();
    511           for (unsigned i = 0; i != NumElts; ++i)
    512             Mask.push_back(ConstantInt::get(
    513                 Type::getInt32Ty(V->getContext()),
    514                 i == InsertedIdx ? ExtractedIdx : NumLHSElts + i));
    515           return std::make_pair(EI->getOperand(0), PermittedRHS);
    516         }
    517 
    518         // If this insertelement is a chain that comes from exactly these two
    519         // vectors, return the vector and the effective shuffle.
    520         if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
    521             collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
    522                                          Mask))
    523           return std::make_pair(EI->getOperand(0), PermittedRHS);
    524       }
    525     }
    526   }
    527 
    528   // Otherwise, we can't do anything fancy. Return an identity vector.
    529   for (unsigned i = 0; i != NumElts; ++i)
    530     Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
    531   return std::make_pair(V, nullptr);
    532 }
    533 
    534 /// Try to find redundant insertvalue instructions, like the following ones:
    535 ///  %0 = insertvalue { i8, i32 } undef, i8 %x, 0
    536 ///  %1 = insertvalue { i8, i32 } %0,    i8 %y, 0
    537 /// Here the second instruction inserts values at the same indices, as the
    538 /// first one, making the first one redundant.
    539 /// It should be transformed to:
    540 ///  %0 = insertvalue { i8, i32 } undef, i8 %y, 0
    541 Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) {
    542   bool IsRedundant = false;
    543   ArrayRef<unsigned int> FirstIndices = I.getIndices();
    544 
    545   // If there is a chain of insertvalue instructions (each of them except the
    546   // last one has only one use and it's another insertvalue insn from this
    547   // chain), check if any of the 'children' uses the same indices as the first
    548   // instruction. In this case, the first one is redundant.
    549   Value *V = &I;
    550   unsigned Depth = 0;
    551   while (V->hasOneUse() && Depth < 10) {
    552     User *U = V->user_back();
    553     auto UserInsInst = dyn_cast<InsertValueInst>(U);
    554     if (!UserInsInst || U->getOperand(0) != V)
    555       break;
    556     if (UserInsInst->getIndices() == FirstIndices) {
    557       IsRedundant = true;
    558       break;
    559     }
    560     V = UserInsInst;
    561     Depth++;
    562   }
    563 
    564   if (IsRedundant)
    565     return replaceInstUsesWith(I, I.getOperand(0));
    566   return nullptr;
    567 }
    568 
    569 Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
    570   Value *VecOp    = IE.getOperand(0);
    571   Value *ScalarOp = IE.getOperand(1);
    572   Value *IdxOp    = IE.getOperand(2);
    573 
    574   // Inserting an undef or into an undefined place, remove this.
    575   if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
    576     replaceInstUsesWith(IE, VecOp);
    577 
    578   // If the inserted element was extracted from some other vector, and if the
    579   // indexes are constant, try to turn this into a shufflevector operation.
    580   if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
    581     if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
    582       unsigned NumInsertVectorElts = IE.getType()->getNumElements();
    583       unsigned NumExtractVectorElts =
    584           EI->getOperand(0)->getType()->getVectorNumElements();
    585       unsigned ExtractedIdx =
    586         cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
    587       unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
    588 
    589       if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract.
    590         return replaceInstUsesWith(IE, VecOp);
    591 
    592       if (InsertedIdx >= NumInsertVectorElts)  // Out of range insert.
    593         return replaceInstUsesWith(IE, UndefValue::get(IE.getType()));
    594 
    595       // If we are extracting a value from a vector, then inserting it right
    596       // back into the same place, just use the input vector.
    597       if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
    598         return replaceInstUsesWith(IE, VecOp);
    599 
    600       // If this insertelement isn't used by some other insertelement, turn it
    601       // (and any insertelements it points to), into one big shuffle.
    602       if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) {
    603         SmallVector<Constant*, 16> Mask;
    604         ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
    605 
    606         // The proposed shuffle may be trivial, in which case we shouldn't
    607         // perform the combine.
    608         if (LR.first != &IE && LR.second != &IE) {
    609           // We now have a shuffle of LHS, RHS, Mask.
    610           if (LR.second == nullptr)
    611             LR.second = UndefValue::get(LR.first->getType());
    612           return new ShuffleVectorInst(LR.first, LR.second,
    613                                        ConstantVector::get(Mask));
    614         }
    615       }
    616     }
    617   }
    618 
    619   unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
    620   APInt UndefElts(VWidth, 0);
    621   APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
    622   if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
    623     if (V != &IE)
    624       return replaceInstUsesWith(IE, V);
    625     return &IE;
    626   }
    627 
    628   return nullptr;
    629 }
    630 
    631 /// Return true if we can evaluate the specified expression tree if the vector
    632 /// elements were shuffled in a different order.
    633 static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
    634                                 unsigned Depth = 5) {
    635   // We can always reorder the elements of a constant.
    636   if (isa<Constant>(V))
    637     return true;
    638 
    639   // We won't reorder vector arguments. No IPO here.
    640   Instruction *I = dyn_cast<Instruction>(V);
    641   if (!I) return false;
    642 
    643   // Two users may expect different orders of the elements. Don't try it.
    644   if (!I->hasOneUse())
    645     return false;
    646 
    647   if (Depth == 0) return false;
    648 
    649   switch (I->getOpcode()) {
    650     case Instruction::Add:
    651     case Instruction::FAdd:
    652     case Instruction::Sub:
    653     case Instruction::FSub:
    654     case Instruction::Mul:
    655     case Instruction::FMul:
    656     case Instruction::UDiv:
    657     case Instruction::SDiv:
    658     case Instruction::FDiv:
    659     case Instruction::URem:
    660     case Instruction::SRem:
    661     case Instruction::FRem:
    662     case Instruction::Shl:
    663     case Instruction::LShr:
    664     case Instruction::AShr:
    665     case Instruction::And:
    666     case Instruction::Or:
    667     case Instruction::Xor:
    668     case Instruction::ICmp:
    669     case Instruction::FCmp:
    670     case Instruction::Trunc:
    671     case Instruction::ZExt:
    672     case Instruction::SExt:
    673     case Instruction::FPToUI:
    674     case Instruction::FPToSI:
    675     case Instruction::UIToFP:
    676     case Instruction::SIToFP:
    677     case Instruction::FPTrunc:
    678     case Instruction::FPExt:
    679     case Instruction::GetElementPtr: {
    680       for (Value *Operand : I->operands()) {
    681         if (!CanEvaluateShuffled(Operand, Mask, Depth-1))
    682           return false;
    683       }
    684       return true;
    685     }
    686     case Instruction::InsertElement: {
    687       ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
    688       if (!CI) return false;
    689       int ElementNumber = CI->getLimitedValue();
    690 
    691       // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
    692       // can't put an element into multiple indices.
    693       bool SeenOnce = false;
    694       for (int i = 0, e = Mask.size(); i != e; ++i) {
    695         if (Mask[i] == ElementNumber) {
    696           if (SeenOnce)
    697             return false;
    698           SeenOnce = true;
    699         }
    700       }
    701       return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1);
    702     }
    703   }
    704   return false;
    705 }
    706 
    707 /// Rebuild a new instruction just like 'I' but with the new operands given.
    708 /// In the event of type mismatch, the type of the operands is correct.
    709 static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) {
    710   // We don't want to use the IRBuilder here because we want the replacement
    711   // instructions to appear next to 'I', not the builder's insertion point.
    712   switch (I->getOpcode()) {
    713     case Instruction::Add:
    714     case Instruction::FAdd:
    715     case Instruction::Sub:
    716     case Instruction::FSub:
    717     case Instruction::Mul:
    718     case Instruction::FMul:
    719     case Instruction::UDiv:
    720     case Instruction::SDiv:
    721     case Instruction::FDiv:
    722     case Instruction::URem:
    723     case Instruction::SRem:
    724     case Instruction::FRem:
    725     case Instruction::Shl:
    726     case Instruction::LShr:
    727     case Instruction::AShr:
    728     case Instruction::And:
    729     case Instruction::Or:
    730     case Instruction::Xor: {
    731       BinaryOperator *BO = cast<BinaryOperator>(I);
    732       assert(NewOps.size() == 2 && "binary operator with #ops != 2");
    733       BinaryOperator *New =
    734           BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
    735                                  NewOps[0], NewOps[1], "", BO);
    736       if (isa<OverflowingBinaryOperator>(BO)) {
    737         New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
    738         New->setHasNoSignedWrap(BO->hasNoSignedWrap());
    739       }
    740       if (isa<PossiblyExactOperator>(BO)) {
    741         New->setIsExact(BO->isExact());
    742       }
    743       if (isa<FPMathOperator>(BO))
    744         New->copyFastMathFlags(I);
    745       return New;
    746     }
    747     case Instruction::ICmp:
    748       assert(NewOps.size() == 2 && "icmp with #ops != 2");
    749       return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
    750                           NewOps[0], NewOps[1]);
    751     case Instruction::FCmp:
    752       assert(NewOps.size() == 2 && "fcmp with #ops != 2");
    753       return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
    754                           NewOps[0], NewOps[1]);
    755     case Instruction::Trunc:
    756     case Instruction::ZExt:
    757     case Instruction::SExt:
    758     case Instruction::FPToUI:
    759     case Instruction::FPToSI:
    760     case Instruction::UIToFP:
    761     case Instruction::SIToFP:
    762     case Instruction::FPTrunc:
    763     case Instruction::FPExt: {
    764       // It's possible that the mask has a different number of elements from
    765       // the original cast. We recompute the destination type to match the mask.
    766       Type *DestTy =
    767           VectorType::get(I->getType()->getScalarType(),
    768                           NewOps[0]->getType()->getVectorNumElements());
    769       assert(NewOps.size() == 1 && "cast with #ops != 1");
    770       return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
    771                               "", I);
    772     }
    773     case Instruction::GetElementPtr: {
    774       Value *Ptr = NewOps[0];
    775       ArrayRef<Value*> Idx = NewOps.slice(1);
    776       GetElementPtrInst *GEP = GetElementPtrInst::Create(
    777           cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
    778       GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
    779       return GEP;
    780     }
    781   }
    782   llvm_unreachable("failed to rebuild vector instructions");
    783 }
    784 
    785 Value *
    786 InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
    787   // Mask.size() does not need to be equal to the number of vector elements.
    788 
    789   assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
    790   if (isa<UndefValue>(V)) {
    791     return UndefValue::get(VectorType::get(V->getType()->getScalarType(),
    792                                            Mask.size()));
    793   }
    794   if (isa<ConstantAggregateZero>(V)) {
    795     return ConstantAggregateZero::get(
    796                VectorType::get(V->getType()->getScalarType(),
    797                                Mask.size()));
    798   }
    799   if (Constant *C = dyn_cast<Constant>(V)) {
    800     SmallVector<Constant *, 16> MaskValues;
    801     for (int i = 0, e = Mask.size(); i != e; ++i) {
    802       if (Mask[i] == -1)
    803         MaskValues.push_back(UndefValue::get(Builder->getInt32Ty()));
    804       else
    805         MaskValues.push_back(Builder->getInt32(Mask[i]));
    806     }
    807     return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
    808                                           ConstantVector::get(MaskValues));
    809   }
    810 
    811   Instruction *I = cast<Instruction>(V);
    812   switch (I->getOpcode()) {
    813     case Instruction::Add:
    814     case Instruction::FAdd:
    815     case Instruction::Sub:
    816     case Instruction::FSub:
    817     case Instruction::Mul:
    818     case Instruction::FMul:
    819     case Instruction::UDiv:
    820     case Instruction::SDiv:
    821     case Instruction::FDiv:
    822     case Instruction::URem:
    823     case Instruction::SRem:
    824     case Instruction::FRem:
    825     case Instruction::Shl:
    826     case Instruction::LShr:
    827     case Instruction::AShr:
    828     case Instruction::And:
    829     case Instruction::Or:
    830     case Instruction::Xor:
    831     case Instruction::ICmp:
    832     case Instruction::FCmp:
    833     case Instruction::Trunc:
    834     case Instruction::ZExt:
    835     case Instruction::SExt:
    836     case Instruction::FPToUI:
    837     case Instruction::FPToSI:
    838     case Instruction::UIToFP:
    839     case Instruction::SIToFP:
    840     case Instruction::FPTrunc:
    841     case Instruction::FPExt:
    842     case Instruction::Select:
    843     case Instruction::GetElementPtr: {
    844       SmallVector<Value*, 8> NewOps;
    845       bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
    846       for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
    847         Value *V = EvaluateInDifferentElementOrder(I->getOperand(i), Mask);
    848         NewOps.push_back(V);
    849         NeedsRebuild |= (V != I->getOperand(i));
    850       }
    851       if (NeedsRebuild) {
    852         return buildNew(I, NewOps);
    853       }
    854       return I;
    855     }
    856     case Instruction::InsertElement: {
    857       int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
    858 
    859       // The insertelement was inserting at Element. Figure out which element
    860       // that becomes after shuffling. The answer is guaranteed to be unique
    861       // by CanEvaluateShuffled.
    862       bool Found = false;
    863       int Index = 0;
    864       for (int e = Mask.size(); Index != e; ++Index) {
    865         if (Mask[Index] == Element) {
    866           Found = true;
    867           break;
    868         }
    869       }
    870 
    871       // If element is not in Mask, no need to handle the operand 1 (element to
    872       // be inserted). Just evaluate values in operand 0 according to Mask.
    873       if (!Found)
    874         return EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
    875 
    876       Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
    877       return InsertElementInst::Create(V, I->getOperand(1),
    878                                        Builder->getInt32(Index), "", I);
    879     }
    880   }
    881   llvm_unreachable("failed to reorder elements of vector instruction!");
    882 }
    883 
    884 static void recognizeIdentityMask(const SmallVectorImpl<int> &Mask,
    885                                   bool &isLHSID, bool &isRHSID) {
    886   isLHSID = isRHSID = true;
    887 
    888   for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
    889     if (Mask[i] < 0) continue;  // Ignore undef values.
    890     // Is this an identity shuffle of the LHS value?
    891     isLHSID &= (Mask[i] == (int)i);
    892 
    893     // Is this an identity shuffle of the RHS value?
    894     isRHSID &= (Mask[i]-e == i);
    895   }
    896 }
    897 
    898 // Returns true if the shuffle is extracting a contiguous range of values from
    899 // LHS, for example:
    900 //                 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
    901 //   Input:        |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
    902 //   Shuffles to:  |EE|FF|GG|HH|
    903 //                 +--+--+--+--+
    904 static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
    905                                        SmallVector<int, 16> &Mask) {
    906   unsigned LHSElems =
    907       cast<VectorType>(SVI.getOperand(0)->getType())->getNumElements();
    908   unsigned MaskElems = Mask.size();
    909   unsigned BegIdx = Mask.front();
    910   unsigned EndIdx = Mask.back();
    911   if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
    912     return false;
    913   for (unsigned I = 0; I != MaskElems; ++I)
    914     if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
    915       return false;
    916   return true;
    917 }
    918 
    919 Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
    920   Value *LHS = SVI.getOperand(0);
    921   Value *RHS = SVI.getOperand(1);
    922   SmallVector<int, 16> Mask = SVI.getShuffleMask();
    923   Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
    924 
    925   bool MadeChange = false;
    926 
    927   // Undefined shuffle mask -> undefined value.
    928   if (isa<UndefValue>(SVI.getOperand(2)))
    929     return replaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
    930 
    931   unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
    932 
    933   APInt UndefElts(VWidth, 0);
    934   APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
    935   if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
    936     if (V != &SVI)
    937       return replaceInstUsesWith(SVI, V);
    938     LHS = SVI.getOperand(0);
    939     RHS = SVI.getOperand(1);
    940     MadeChange = true;
    941   }
    942 
    943   unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements();
    944 
    945   // Canonicalize shuffle(x    ,x,mask) -> shuffle(x, undef,mask')
    946   // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
    947   if (LHS == RHS || isa<UndefValue>(LHS)) {
    948     if (isa<UndefValue>(LHS) && LHS == RHS) {
    949       // shuffle(undef,undef,mask) -> undef.
    950       Value *Result = (VWidth == LHSWidth)
    951                       ? LHS : UndefValue::get(SVI.getType());
    952       return replaceInstUsesWith(SVI, Result);
    953     }
    954 
    955     // Remap any references to RHS to use LHS.
    956     SmallVector<Constant*, 16> Elts;
    957     for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
    958       if (Mask[i] < 0) {
    959         Elts.push_back(UndefValue::get(Int32Ty));
    960         continue;
    961       }
    962 
    963       if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
    964           (Mask[i] <  (int)e && isa<UndefValue>(LHS))) {
    965         Mask[i] = -1;     // Turn into undef.
    966         Elts.push_back(UndefValue::get(Int32Ty));
    967       } else {
    968         Mask[i] = Mask[i] % e;  // Force to LHS.
    969         Elts.push_back(ConstantInt::get(Int32Ty, Mask[i]));
    970       }
    971     }
    972     SVI.setOperand(0, SVI.getOperand(1));
    973     SVI.setOperand(1, UndefValue::get(RHS->getType()));
    974     SVI.setOperand(2, ConstantVector::get(Elts));
    975     LHS = SVI.getOperand(0);
    976     RHS = SVI.getOperand(1);
    977     MadeChange = true;
    978   }
    979 
    980   if (VWidth == LHSWidth) {
    981     // Analyze the shuffle, are the LHS or RHS and identity shuffles?
    982     bool isLHSID, isRHSID;
    983     recognizeIdentityMask(Mask, isLHSID, isRHSID);
    984 
    985     // Eliminate identity shuffles.
    986     if (isLHSID) return replaceInstUsesWith(SVI, LHS);
    987     if (isRHSID) return replaceInstUsesWith(SVI, RHS);
    988   }
    989 
    990   if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) {
    991     Value *V = EvaluateInDifferentElementOrder(LHS, Mask);
    992     return replaceInstUsesWith(SVI, V);
    993   }
    994 
    995   // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
    996   // a non-vector type. We can instead bitcast the original vector followed by
    997   // an extract of the desired element:
    998   //
    999   //   %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
   1000   //                         <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1001   //   %1 = bitcast <4 x i8> %sroa to i32
   1002   // Becomes:
   1003   //   %bc = bitcast <16 x i8> %in to <4 x i32>
   1004   //   %ext = extractelement <4 x i32> %bc, i32 0
   1005   //
   1006   // If the shuffle is extracting a contiguous range of values from the input
   1007   // vector then each use which is a bitcast of the extracted size can be
   1008   // replaced. This will work if the vector types are compatible, and the begin
   1009   // index is aligned to a value in the casted vector type. If the begin index
   1010   // isn't aligned then we can shuffle the original vector (keeping the same
   1011   // vector type) before extracting.
   1012   //
   1013   // This code will bail out if the target type is fundamentally incompatible
   1014   // with vectors of the source type.
   1015   //
   1016   // Example of <16 x i8>, target type i32:
   1017   // Index range [4,8):         v-----------v Will work.
   1018   //                +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
   1019   //     <16 x i8>: |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |
   1020   //     <4 x i32>: |           |           |           |           |
   1021   //                +-----------+-----------+-----------+-----------+
   1022   // Index range [6,10):              ^-----------^ Needs an extra shuffle.
   1023   // Target type i40:           ^--------------^ Won't work, bail.
   1024   if (isShuffleExtractingFromLHS(SVI, Mask)) {
   1025     Value *V = LHS;
   1026     unsigned MaskElems = Mask.size();
   1027     unsigned BegIdx = Mask.front();
   1028     VectorType *SrcTy = cast<VectorType>(V->getType());
   1029     unsigned VecBitWidth = SrcTy->getBitWidth();
   1030     unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
   1031     assert(SrcElemBitWidth && "vector elements must have a bitwidth");
   1032     unsigned SrcNumElems = SrcTy->getNumElements();
   1033     SmallVector<BitCastInst *, 8> BCs;
   1034     DenseMap<Type *, Value *> NewBCs;
   1035     for (User *U : SVI.users())
   1036       if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
   1037         if (!BC->use_empty())
   1038           // Only visit bitcasts that weren't previously handled.
   1039           BCs.push_back(BC);
   1040     for (BitCastInst *BC : BCs) {
   1041       Type *TgtTy = BC->getDestTy();
   1042       unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
   1043       if (!TgtElemBitWidth)
   1044         continue;
   1045       unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
   1046       bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
   1047       bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
   1048       if (!VecBitWidthsEqual)
   1049         continue;
   1050       if (!VectorType::isValidElementType(TgtTy))
   1051         continue;
   1052       VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems);
   1053       if (!BegIsAligned) {
   1054         // Shuffle the input so [0,NumElements) contains the output, and
   1055         // [NumElems,SrcNumElems) is undef.
   1056         SmallVector<Constant *, 16> ShuffleMask(SrcNumElems,
   1057                                                 UndefValue::get(Int32Ty));
   1058         for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
   1059           ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx);
   1060         V = Builder->CreateShuffleVector(V, UndefValue::get(V->getType()),
   1061                                          ConstantVector::get(ShuffleMask),
   1062                                          SVI.getName() + ".extract");
   1063         BegIdx = 0;
   1064       }
   1065       unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
   1066       assert(SrcElemsPerTgtElem);
   1067       BegIdx /= SrcElemsPerTgtElem;
   1068       bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
   1069       auto *NewBC =
   1070           BCAlreadyExists
   1071               ? NewBCs[CastSrcTy]
   1072               : Builder->CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
   1073       if (!BCAlreadyExists)
   1074         NewBCs[CastSrcTy] = NewBC;
   1075       auto *Ext = Builder->CreateExtractElement(
   1076           NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
   1077       // The shufflevector isn't being replaced: the bitcast that used it
   1078       // is. InstCombine will visit the newly-created instructions.
   1079       replaceInstUsesWith(*BC, Ext);
   1080       MadeChange = true;
   1081     }
   1082   }
   1083 
   1084   // If the LHS is a shufflevector itself, see if we can combine it with this
   1085   // one without producing an unusual shuffle.
   1086   // Cases that might be simplified:
   1087   // 1.
   1088   // x1=shuffle(v1,v2,mask1)
   1089   //  x=shuffle(x1,undef,mask)
   1090   //        ==>
   1091   //  x=shuffle(v1,undef,newMask)
   1092   // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
   1093   // 2.
   1094   // x1=shuffle(v1,undef,mask1)
   1095   //  x=shuffle(x1,x2,mask)
   1096   // where v1.size() == mask1.size()
   1097   //        ==>
   1098   //  x=shuffle(v1,x2,newMask)
   1099   // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
   1100   // 3.
   1101   // x2=shuffle(v2,undef,mask2)
   1102   //  x=shuffle(x1,x2,mask)
   1103   // where v2.size() == mask2.size()
   1104   //        ==>
   1105   //  x=shuffle(x1,v2,newMask)
   1106   // newMask[i] = (mask[i] < x1.size())
   1107   //              ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
   1108   // 4.
   1109   // x1=shuffle(v1,undef,mask1)
   1110   // x2=shuffle(v2,undef,mask2)
   1111   //  x=shuffle(x1,x2,mask)
   1112   // where v1.size() == v2.size()
   1113   //        ==>
   1114   //  x=shuffle(v1,v2,newMask)
   1115   // newMask[i] = (mask[i] < x1.size())
   1116   //              ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
   1117   //
   1118   // Here we are really conservative:
   1119   // we are absolutely afraid of producing a shuffle mask not in the input
   1120   // program, because the code gen may not be smart enough to turn a merged
   1121   // shuffle into two specific shuffles: it may produce worse code.  As such,
   1122   // we only merge two shuffles if the result is either a splat or one of the
   1123   // input shuffle masks.  In this case, merging the shuffles just removes
   1124   // one instruction, which we know is safe.  This is good for things like
   1125   // turning: (splat(splat)) -> splat, or
   1126   // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
   1127   ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
   1128   ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
   1129   if (LHSShuffle)
   1130     if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
   1131       LHSShuffle = nullptr;
   1132   if (RHSShuffle)
   1133     if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
   1134       RHSShuffle = nullptr;
   1135   if (!LHSShuffle && !RHSShuffle)
   1136     return MadeChange ? &SVI : nullptr;
   1137 
   1138   Value* LHSOp0 = nullptr;
   1139   Value* LHSOp1 = nullptr;
   1140   Value* RHSOp0 = nullptr;
   1141   unsigned LHSOp0Width = 0;
   1142   unsigned RHSOp0Width = 0;
   1143   if (LHSShuffle) {
   1144     LHSOp0 = LHSShuffle->getOperand(0);
   1145     LHSOp1 = LHSShuffle->getOperand(1);
   1146     LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements();
   1147   }
   1148   if (RHSShuffle) {
   1149     RHSOp0 = RHSShuffle->getOperand(0);
   1150     RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements();
   1151   }
   1152   Value* newLHS = LHS;
   1153   Value* newRHS = RHS;
   1154   if (LHSShuffle) {
   1155     // case 1
   1156     if (isa<UndefValue>(RHS)) {
   1157       newLHS = LHSOp0;
   1158       newRHS = LHSOp1;
   1159     }
   1160     // case 2 or 4
   1161     else if (LHSOp0Width == LHSWidth) {
   1162       newLHS = LHSOp0;
   1163     }
   1164   }
   1165   // case 3 or 4
   1166   if (RHSShuffle && RHSOp0Width == LHSWidth) {
   1167     newRHS = RHSOp0;
   1168   }
   1169   // case 4
   1170   if (LHSOp0 == RHSOp0) {
   1171     newLHS = LHSOp0;
   1172     newRHS = nullptr;
   1173   }
   1174 
   1175   if (newLHS == LHS && newRHS == RHS)
   1176     return MadeChange ? &SVI : nullptr;
   1177 
   1178   SmallVector<int, 16> LHSMask;
   1179   SmallVector<int, 16> RHSMask;
   1180   if (newLHS != LHS)
   1181     LHSMask = LHSShuffle->getShuffleMask();
   1182   if (RHSShuffle && newRHS != RHS)
   1183     RHSMask = RHSShuffle->getShuffleMask();
   1184 
   1185   unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
   1186   SmallVector<int, 16> newMask;
   1187   bool isSplat = true;
   1188   int SplatElt = -1;
   1189   // Create a new mask for the new ShuffleVectorInst so that the new
   1190   // ShuffleVectorInst is equivalent to the original one.
   1191   for (unsigned i = 0; i < VWidth; ++i) {
   1192     int eltMask;
   1193     if (Mask[i] < 0) {
   1194       // This element is an undef value.
   1195       eltMask = -1;
   1196     } else if (Mask[i] < (int)LHSWidth) {
   1197       // This element is from left hand side vector operand.
   1198       //
   1199       // If LHS is going to be replaced (case 1, 2, or 4), calculate the
   1200       // new mask value for the element.
   1201       if (newLHS != LHS) {
   1202         eltMask = LHSMask[Mask[i]];
   1203         // If the value selected is an undef value, explicitly specify it
   1204         // with a -1 mask value.
   1205         if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
   1206           eltMask = -1;
   1207       } else
   1208         eltMask = Mask[i];
   1209     } else {
   1210       // This element is from right hand side vector operand
   1211       //
   1212       // If the value selected is an undef value, explicitly specify it
   1213       // with a -1 mask value. (case 1)
   1214       if (isa<UndefValue>(RHS))
   1215         eltMask = -1;
   1216       // If RHS is going to be replaced (case 3 or 4), calculate the
   1217       // new mask value for the element.
   1218       else if (newRHS != RHS) {
   1219         eltMask = RHSMask[Mask[i]-LHSWidth];
   1220         // If the value selected is an undef value, explicitly specify it
   1221         // with a -1 mask value.
   1222         if (eltMask >= (int)RHSOp0Width) {
   1223           assert(isa<UndefValue>(RHSShuffle->getOperand(1))
   1224                  && "should have been check above");
   1225           eltMask = -1;
   1226         }
   1227       } else
   1228         eltMask = Mask[i]-LHSWidth;
   1229 
   1230       // If LHS's width is changed, shift the mask value accordingly.
   1231       // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any
   1232       // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
   1233       // If newRHS == newLHS, we want to remap any references from newRHS to
   1234       // newLHS so that we can properly identify splats that may occur due to
   1235       // obfuscation across the two vectors.
   1236       if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
   1237         eltMask += newLHSWidth;
   1238     }
   1239 
   1240     // Check if this could still be a splat.
   1241     if (eltMask >= 0) {
   1242       if (SplatElt >= 0 && SplatElt != eltMask)
   1243         isSplat = false;
   1244       SplatElt = eltMask;
   1245     }
   1246 
   1247     newMask.push_back(eltMask);
   1248   }
   1249 
   1250   // If the result mask is equal to one of the original shuffle masks,
   1251   // or is a splat, do the replacement.
   1252   if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
   1253     SmallVector<Constant*, 16> Elts;
   1254     for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
   1255       if (newMask[i] < 0) {
   1256         Elts.push_back(UndefValue::get(Int32Ty));
   1257       } else {
   1258         Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
   1259       }
   1260     }
   1261     if (!newRHS)
   1262       newRHS = UndefValue::get(newLHS->getType());
   1263     return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
   1264   }
   1265 
   1266   // If the result mask is an identity, replace uses of this instruction with
   1267   // corresponding argument.
   1268   bool isLHSID, isRHSID;
   1269   recognizeIdentityMask(newMask, isLHSID, isRHSID);
   1270   if (isLHSID && VWidth == LHSOp0Width) return replaceInstUsesWith(SVI, newLHS);
   1271   if (isRHSID && VWidth == RHSOp0Width) return replaceInstUsesWith(SVI, newRHS);
   1272 
   1273   return MadeChange ? &SVI : nullptr;
   1274 }
   1275