Home | History | Annotate | Download | only in InstCombine
      1 //===- InstCombineVectorOps.cpp -------------------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements instcombine for ExtractElement, InsertElement and
     11 // ShuffleVector.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "InstCombine.h"
     16 using namespace llvm;
     17 
     18 /// CheapToScalarize - Return true if the value is cheaper to scalarize than it
     19 /// is to leave as a vector operation.  isConstant indicates whether we're
     20 /// extracting one known element.  If false we're extracting a variable index.
     21 static bool CheapToScalarize(Value *V, bool isConstant) {
     22   if (Constant *C = dyn_cast<Constant>(V)) {
     23     if (isConstant) return true;
     24 
     25     // If all elts are the same, we can extract it and use any of the values.
     26     Constant *Op0 = C->getAggregateElement(0U);
     27     for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; ++i)
     28       if (C->getAggregateElement(i) != Op0)
     29         return false;
     30     return true;
     31   }
     32   Instruction *I = dyn_cast<Instruction>(V);
     33   if (!I) return false;
     34 
     35   // Insert element gets simplified to the inserted element or is deleted if
     36   // this is constant idx extract element and its a constant idx insertelt.
     37   if (I->getOpcode() == Instruction::InsertElement && isConstant &&
     38       isa<ConstantInt>(I->getOperand(2)))
     39     return true;
     40   if (I->getOpcode() == Instruction::Load && I->hasOneUse())
     41     return true;
     42   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
     43     if (BO->hasOneUse() &&
     44         (CheapToScalarize(BO->getOperand(0), isConstant) ||
     45          CheapToScalarize(BO->getOperand(1), isConstant)))
     46       return true;
     47   if (CmpInst *CI = dyn_cast<CmpInst>(I))
     48     if (CI->hasOneUse() &&
     49         (CheapToScalarize(CI->getOperand(0), isConstant) ||
     50          CheapToScalarize(CI->getOperand(1), isConstant)))
     51       return true;
     52 
     53   return false;
     54 }
     55 
     56 /// FindScalarElement - Given a vector and an element number, see if the scalar
     57 /// value is already around as a register, for example if it were inserted then
     58 /// extracted from the vector.
     59 static Value *FindScalarElement(Value *V, unsigned EltNo) {
     60   assert(V->getType()->isVectorTy() && "Not looking at a vector?");
     61   VectorType *VTy = cast<VectorType>(V->getType());
     62   unsigned Width = VTy->getNumElements();
     63   if (EltNo >= Width)  // Out of range access.
     64     return UndefValue::get(VTy->getElementType());
     65 
     66   if (Constant *C = dyn_cast<Constant>(V))
     67     return C->getAggregateElement(EltNo);
     68 
     69   if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
     70     // If this is an insert to a variable element, we don't know what it is.
     71     if (!isa<ConstantInt>(III->getOperand(2)))
     72       return 0;
     73     unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
     74 
     75     // If this is an insert to the element we are looking for, return the
     76     // inserted value.
     77     if (EltNo == IIElt)
     78       return III->getOperand(1);
     79 
     80     // Otherwise, the insertelement doesn't modify the value, recurse on its
     81     // vector input.
     82     return FindScalarElement(III->getOperand(0), EltNo);
     83   }
     84 
     85   if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
     86     unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
     87     int InEl = SVI->getMaskValue(EltNo);
     88     if (InEl < 0)
     89       return UndefValue::get(VTy->getElementType());
     90     if (InEl < (int)LHSWidth)
     91       return FindScalarElement(SVI->getOperand(0), InEl);
     92     return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
     93   }
     94 
     95   // Otherwise, we don't know.
     96   return 0;
     97 }
     98 
     99 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
    100   // If vector val is constant with all elements the same, replace EI with
    101   // that element.  We handle a known element # below.
    102   if (Constant *C = dyn_cast<Constant>(EI.getOperand(0)))
    103     if (CheapToScalarize(C, false))
    104       return ReplaceInstUsesWith(EI, C->getAggregateElement(0U));
    105 
    106   // If extracting a specified index from the vector, see if we can recursively
    107   // find a previously computed scalar that was inserted into the vector.
    108   if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
    109     unsigned IndexVal = IdxC->getZExtValue();
    110     unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
    111 
    112     // If this is extracting an invalid index, turn this into undef, to avoid
    113     // crashing the code below.
    114     if (IndexVal >= VectorWidth)
    115       return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
    116 
    117     // This instruction only demands the single element from the input vector.
    118     // If the input vector has a single use, simplify it based on this use
    119     // property.
    120     if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
    121       APInt UndefElts(VectorWidth, 0);
    122       APInt DemandedMask(VectorWidth, 0);
    123       DemandedMask.setBit(IndexVal);
    124       if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
    125                                                 DemandedMask, UndefElts)) {
    126         EI.setOperand(0, V);
    127         return &EI;
    128       }
    129     }
    130 
    131     if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
    132       return ReplaceInstUsesWith(EI, Elt);
    133 
    134     // If the this extractelement is directly using a bitcast from a vector of
    135     // the same number of elements, see if we can find the source element from
    136     // it.  In this case, we will end up needing to bitcast the scalars.
    137     if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
    138       if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
    139         if (VT->getNumElements() == VectorWidth)
    140           if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
    141             return new BitCastInst(Elt, EI.getType());
    142     }
    143   }
    144 
    145   if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
    146     // Push extractelement into predecessor operation if legal and
    147     // profitable to do so
    148     if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
    149       if (I->hasOneUse() &&
    150           CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
    151         Value *newEI0 =
    152           Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
    153                                         EI.getName()+".lhs");
    154         Value *newEI1 =
    155           Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
    156                                         EI.getName()+".rhs");
    157         return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
    158       }
    159     } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
    160       // Extracting the inserted element?
    161       if (IE->getOperand(2) == EI.getOperand(1))
    162         return ReplaceInstUsesWith(EI, IE->getOperand(1));
    163       // If the inserted and extracted elements are constants, they must not
    164       // be the same value, extract from the pre-inserted value instead.
    165       if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) {
    166         Worklist.AddValue(EI.getOperand(0));
    167         EI.setOperand(0, IE->getOperand(0));
    168         return &EI;
    169       }
    170     } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) {
    171       // If this is extracting an element from a shufflevector, figure out where
    172       // it came from and extract from the appropriate input element instead.
    173       if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
    174         int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
    175         Value *Src;
    176         unsigned LHSWidth =
    177           SVI->getOperand(0)->getType()->getVectorNumElements();
    178 
    179         if (SrcIdx < 0)
    180           return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
    181         if (SrcIdx < (int)LHSWidth)
    182           Src = SVI->getOperand(0);
    183         else {
    184           SrcIdx -= LHSWidth;
    185           Src = SVI->getOperand(1);
    186         }
    187         Type *Int32Ty = Type::getInt32Ty(EI.getContext());
    188         return ExtractElementInst::Create(Src,
    189                                           ConstantInt::get(Int32Ty,
    190                                                            SrcIdx, false));
    191       }
    192     } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
    193       // Canonicalize extractelement(cast) -> cast(extractelement)
    194       // bitcasts can change the number of vector elements and they cost nothing
    195       if (CI->hasOneUse() && EI.hasOneUse() &&
    196           (CI->getOpcode() != Instruction::BitCast)) {
    197         Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
    198                                                   EI.getIndexOperand());
    199         return CastInst::Create(CI->getOpcode(), EE, EI.getType());
    200       }
    201     }
    202   }
    203   return 0;
    204 }
    205 
    206 /// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
    207 /// elements from either LHS or RHS, return the shuffle mask and true.
    208 /// Otherwise, return false.
    209 static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
    210                                          SmallVectorImpl<Constant*> &Mask) {
    211   assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
    212          "Invalid CollectSingleShuffleElements");
    213   unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
    214 
    215   if (isa<UndefValue>(V)) {
    216     Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
    217     return true;
    218   }
    219 
    220   if (V == LHS) {
    221     for (unsigned i = 0; i != NumElts; ++i)
    222       Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
    223     return true;
    224   }
    225 
    226   if (V == RHS) {
    227     for (unsigned i = 0; i != NumElts; ++i)
    228       Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
    229                                       i+NumElts));
    230     return true;
    231   }
    232 
    233   if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
    234     // If this is an insert of an extract from some other vector, include it.
    235     Value *VecOp    = IEI->getOperand(0);
    236     Value *ScalarOp = IEI->getOperand(1);
    237     Value *IdxOp    = IEI->getOperand(2);
    238 
    239     if (!isa<ConstantInt>(IdxOp))
    240       return false;
    241     unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
    242 
    243     if (isa<UndefValue>(ScalarOp)) {  // inserting undef into vector.
    244       // Okay, we can handle this if the vector we are insertinting into is
    245       // transitively ok.
    246       if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
    247         // If so, update the mask to reflect the inserted undef.
    248         Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
    249         return true;
    250       }
    251     } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
    252       if (isa<ConstantInt>(EI->getOperand(1)) &&
    253           EI->getOperand(0)->getType() == V->getType()) {
    254         unsigned ExtractedIdx =
    255         cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
    256 
    257         // This must be extracting from either LHS or RHS.
    258         if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
    259           // Okay, we can handle this if the vector we are insertinting into is
    260           // transitively ok.
    261           if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
    262             // If so, update the mask to reflect the inserted value.
    263             if (EI->getOperand(0) == LHS) {
    264               Mask[InsertedIdx % NumElts] =
    265               ConstantInt::get(Type::getInt32Ty(V->getContext()),
    266                                ExtractedIdx);
    267             } else {
    268               assert(EI->getOperand(0) == RHS);
    269               Mask[InsertedIdx % NumElts] =
    270               ConstantInt::get(Type::getInt32Ty(V->getContext()),
    271                                ExtractedIdx+NumElts);
    272             }
    273             return true;
    274           }
    275         }
    276       }
    277     }
    278   }
    279   // TODO: Handle shufflevector here!
    280 
    281   return false;
    282 }
    283 
    284 /// CollectShuffleElements - We are building a shuffle of V, using RHS as the
    285 /// RHS of the shuffle instruction, if it is not null.  Return a shuffle mask
    286 /// that computes V and the LHS value of the shuffle.
    287 static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
    288                                      Value *&RHS) {
    289   assert(V->getType()->isVectorTy() &&
    290          (RHS == 0 || V->getType() == RHS->getType()) &&
    291          "Invalid shuffle!");
    292   unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
    293 
    294   if (isa<UndefValue>(V)) {
    295     Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
    296     return V;
    297   }
    298 
    299   if (isa<ConstantAggregateZero>(V)) {
    300     Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
    301     return V;
    302   }
    303 
    304   if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
    305     // If this is an insert of an extract from some other vector, include it.
    306     Value *VecOp    = IEI->getOperand(0);
    307     Value *ScalarOp = IEI->getOperand(1);
    308     Value *IdxOp    = IEI->getOperand(2);
    309 
    310     if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
    311       if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
    312           EI->getOperand(0)->getType() == V->getType()) {
    313         unsigned ExtractedIdx =
    314           cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
    315         unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
    316 
    317         // Either the extracted from or inserted into vector must be RHSVec,
    318         // otherwise we'd end up with a shuffle of three inputs.
    319         if (EI->getOperand(0) == RHS || RHS == 0) {
    320           RHS = EI->getOperand(0);
    321           Value *V = CollectShuffleElements(VecOp, Mask, RHS);
    322           Mask[InsertedIdx % NumElts] =
    323             ConstantInt::get(Type::getInt32Ty(V->getContext()),
    324                              NumElts+ExtractedIdx);
    325           return V;
    326         }
    327 
    328         if (VecOp == RHS) {
    329           Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
    330           // Everything but the extracted element is replaced with the RHS.
    331           for (unsigned i = 0; i != NumElts; ++i) {
    332             if (i != InsertedIdx)
    333               Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()),
    334                                          NumElts+i);
    335           }
    336           return V;
    337         }
    338 
    339         // If this insertelement is a chain that comes from exactly these two
    340         // vectors, return the vector and the effective shuffle.
    341         if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
    342           return EI->getOperand(0);
    343       }
    344     }
    345   }
    346   // TODO: Handle shufflevector here!
    347 
    348   // Otherwise, can't do anything fancy.  Return an identity vector.
    349   for (unsigned i = 0; i != NumElts; ++i)
    350     Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
    351   return V;
    352 }
    353 
    354 Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
    355   Value *VecOp    = IE.getOperand(0);
    356   Value *ScalarOp = IE.getOperand(1);
    357   Value *IdxOp    = IE.getOperand(2);
    358 
    359   // Inserting an undef or into an undefined place, remove this.
    360   if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
    361     ReplaceInstUsesWith(IE, VecOp);
    362 
    363   // If the inserted element was extracted from some other vector, and if the
    364   // indexes are constant, try to turn this into a shufflevector operation.
    365   if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
    366     if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
    367         EI->getOperand(0)->getType() == IE.getType()) {
    368       unsigned NumVectorElts = IE.getType()->getNumElements();
    369       unsigned ExtractedIdx =
    370         cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
    371       unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
    372 
    373       if (ExtractedIdx >= NumVectorElts) // Out of range extract.
    374         return ReplaceInstUsesWith(IE, VecOp);
    375 
    376       if (InsertedIdx >= NumVectorElts)  // Out of range insert.
    377         return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
    378 
    379       // If we are extracting a value from a vector, then inserting it right
    380       // back into the same place, just use the input vector.
    381       if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
    382         return ReplaceInstUsesWith(IE, VecOp);
    383 
    384       // If this insertelement isn't used by some other insertelement, turn it
    385       // (and any insertelements it points to), into one big shuffle.
    386       if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
    387         SmallVector<Constant*, 16> Mask;
    388         Value *RHS = 0;
    389         Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
    390         if (RHS == 0) RHS = UndefValue::get(LHS->getType());
    391         // We now have a shuffle of LHS, RHS, Mask.
    392         return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask));
    393       }
    394     }
    395   }
    396 
    397   unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
    398   APInt UndefElts(VWidth, 0);
    399   APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
    400   if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
    401     if (V != &IE)
    402       return ReplaceInstUsesWith(IE, V);
    403     return &IE;
    404   }
    405 
    406   return 0;
    407 }
    408 
    409 
    410 Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
    411   Value *LHS = SVI.getOperand(0);
    412   Value *RHS = SVI.getOperand(1);
    413   SmallVector<int, 16> Mask = SVI.getShuffleMask();
    414 
    415   bool MadeChange = false;
    416 
    417   // Undefined shuffle mask -> undefined value.
    418   if (isa<UndefValue>(SVI.getOperand(2)))
    419     return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
    420 
    421   unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
    422 
    423   APInt UndefElts(VWidth, 0);
    424   APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
    425   if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
    426     if (V != &SVI)
    427       return ReplaceInstUsesWith(SVI, V);
    428     LHS = SVI.getOperand(0);
    429     RHS = SVI.getOperand(1);
    430     MadeChange = true;
    431   }
    432 
    433   unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements();
    434 
    435   // Canonicalize shuffle(x    ,x,mask) -> shuffle(x, undef,mask')
    436   // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
    437   if (LHS == RHS || isa<UndefValue>(LHS)) {
    438     if (isa<UndefValue>(LHS) && LHS == RHS) {
    439       // shuffle(undef,undef,mask) -> undef.
    440       Value* result = (VWidth == LHSWidth)
    441                       ? LHS : UndefValue::get(SVI.getType());
    442       return ReplaceInstUsesWith(SVI, result);
    443     }
    444 
    445     // Remap any references to RHS to use LHS.
    446     SmallVector<Constant*, 16> Elts;
    447     for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
    448       if (Mask[i] < 0) {
    449         Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
    450         continue;
    451       }
    452 
    453       if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
    454           (Mask[i] <  (int)e && isa<UndefValue>(LHS))) {
    455         Mask[i] = -1;     // Turn into undef.
    456         Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
    457       } else {
    458         Mask[i] = Mask[i] % e;  // Force to LHS.
    459         Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
    460                                         Mask[i]));
    461       }
    462     }
    463     SVI.setOperand(0, SVI.getOperand(1));
    464     SVI.setOperand(1, UndefValue::get(RHS->getType()));
    465     SVI.setOperand(2, ConstantVector::get(Elts));
    466     LHS = SVI.getOperand(0);
    467     RHS = SVI.getOperand(1);
    468     MadeChange = true;
    469   }
    470 
    471   if (VWidth == LHSWidth) {
    472     // Analyze the shuffle, are the LHS or RHS and identity shuffles?
    473     bool isLHSID = true, isRHSID = true;
    474 
    475     for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
    476       if (Mask[i] < 0) continue;  // Ignore undef values.
    477       // Is this an identity shuffle of the LHS value?
    478       isLHSID &= (Mask[i] == (int)i);
    479 
    480       // Is this an identity shuffle of the RHS value?
    481       isRHSID &= (Mask[i]-e == i);
    482     }
    483 
    484     // Eliminate identity shuffles.
    485     if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
    486     if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
    487   }
    488 
    489   // If the LHS is a shufflevector itself, see if we can combine it with this
    490   // one without producing an unusual shuffle.
    491   // Cases that might be simplified:
    492   // 1.
    493   // x1=shuffle(v1,v2,mask1)
    494   //  x=shuffle(x1,undef,mask)
    495   //        ==>
    496   //  x=shuffle(v1,undef,newMask)
    497   // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
    498   // 2.
    499   // x1=shuffle(v1,undef,mask1)
    500   //  x=shuffle(x1,x2,mask)
    501   // where v1.size() == mask1.size()
    502   //        ==>
    503   //  x=shuffle(v1,x2,newMask)
    504   // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
    505   // 3.
    506   // x2=shuffle(v2,undef,mask2)
    507   //  x=shuffle(x1,x2,mask)
    508   // where v2.size() == mask2.size()
    509   //        ==>
    510   //  x=shuffle(x1,v2,newMask)
    511   // newMask[i] = (mask[i] < x1.size())
    512   //              ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
    513   // 4.
    514   // x1=shuffle(v1,undef,mask1)
    515   // x2=shuffle(v2,undef,mask2)
    516   //  x=shuffle(x1,x2,mask)
    517   // where v1.size() == v2.size()
    518   //        ==>
    519   //  x=shuffle(v1,v2,newMask)
    520   // newMask[i] = (mask[i] < x1.size())
    521   //              ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
    522   //
    523   // Here we are really conservative:
    524   // we are absolutely afraid of producing a shuffle mask not in the input
    525   // program, because the code gen may not be smart enough to turn a merged
    526   // shuffle into two specific shuffles: it may produce worse code.  As such,
    527   // we only merge two shuffles if the result is either a splat or one of the
    528   // input shuffle masks.  In this case, merging the shuffles just removes
    529   // one instruction, which we know is safe.  This is good for things like
    530   // turning: (splat(splat)) -> splat, or
    531   // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
    532   ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
    533   ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
    534   if (LHSShuffle)
    535     if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
    536       LHSShuffle = NULL;
    537   if (RHSShuffle)
    538     if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
    539       RHSShuffle = NULL;
    540   if (!LHSShuffle && !RHSShuffle)
    541     return MadeChange ? &SVI : 0;
    542 
    543   Value* LHSOp0 = NULL;
    544   Value* LHSOp1 = NULL;
    545   Value* RHSOp0 = NULL;
    546   unsigned LHSOp0Width = 0;
    547   unsigned RHSOp0Width = 0;
    548   if (LHSShuffle) {
    549     LHSOp0 = LHSShuffle->getOperand(0);
    550     LHSOp1 = LHSShuffle->getOperand(1);
    551     LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements();
    552   }
    553   if (RHSShuffle) {
    554     RHSOp0 = RHSShuffle->getOperand(0);
    555     RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements();
    556   }
    557   Value* newLHS = LHS;
    558   Value* newRHS = RHS;
    559   if (LHSShuffle) {
    560     // case 1
    561     if (isa<UndefValue>(RHS)) {
    562       newLHS = LHSOp0;
    563       newRHS = LHSOp1;
    564     }
    565     // case 2 or 4
    566     else if (LHSOp0Width == LHSWidth) {
    567       newLHS = LHSOp0;
    568     }
    569   }
    570   // case 3 or 4
    571   if (RHSShuffle && RHSOp0Width == LHSWidth) {
    572     newRHS = RHSOp0;
    573   }
    574   // case 4
    575   if (LHSOp0 == RHSOp0) {
    576     newLHS = LHSOp0;
    577     newRHS = NULL;
    578   }
    579 
    580   if (newLHS == LHS && newRHS == RHS)
    581     return MadeChange ? &SVI : 0;
    582 
    583   SmallVector<int, 16> LHSMask;
    584   SmallVector<int, 16> RHSMask;
    585   if (newLHS != LHS)
    586     LHSMask = LHSShuffle->getShuffleMask();
    587   if (RHSShuffle && newRHS != RHS)
    588     RHSMask = RHSShuffle->getShuffleMask();
    589 
    590   unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
    591   SmallVector<int, 16> newMask;
    592   bool isSplat = true;
    593   int SplatElt = -1;
    594   // Create a new mask for the new ShuffleVectorInst so that the new
    595   // ShuffleVectorInst is equivalent to the original one.
    596   for (unsigned i = 0; i < VWidth; ++i) {
    597     int eltMask;
    598     if (Mask[i] == -1) {
    599       // This element is an undef value.
    600       eltMask = -1;
    601     } else if (Mask[i] < (int)LHSWidth) {
    602       // This element is from left hand side vector operand.
    603       //
    604       // If LHS is going to be replaced (case 1, 2, or 4), calculate the
    605       // new mask value for the element.
    606       if (newLHS != LHS) {
    607         eltMask = LHSMask[Mask[i]];
    608         // If the value selected is an undef value, explicitly specify it
    609         // with a -1 mask value.
    610         if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
    611           eltMask = -1;
    612       }
    613       else
    614         eltMask = Mask[i];
    615     } else {
    616       // This element is from right hand side vector operand
    617       //
    618       // If the value selected is an undef value, explicitly specify it
    619       // with a -1 mask value. (case 1)
    620       if (isa<UndefValue>(RHS))
    621         eltMask = -1;
    622       // If RHS is going to be replaced (case 3 or 4), calculate the
    623       // new mask value for the element.
    624       else if (newRHS != RHS) {
    625         eltMask = RHSMask[Mask[i]-LHSWidth];
    626         // If the value selected is an undef value, explicitly specify it
    627         // with a -1 mask value.
    628         if (eltMask >= (int)RHSOp0Width) {
    629           assert(isa<UndefValue>(RHSShuffle->getOperand(1))
    630                  && "should have been check above");
    631           eltMask = -1;
    632         }
    633       }
    634       else
    635         eltMask = Mask[i]-LHSWidth;
    636 
    637       // If LHS's width is changed, shift the mask value accordingly.
    638       // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any
    639       // references to RHSOp0 to LHSOp0, so we don't need to shift the mask.
    640       if (eltMask >= 0 && newRHS != NULL)
    641         eltMask += newLHSWidth;
    642     }
    643 
    644     // Check if this could still be a splat.
    645     if (eltMask >= 0) {
    646       if (SplatElt >= 0 && SplatElt != eltMask)
    647         isSplat = false;
    648       SplatElt = eltMask;
    649     }
    650 
    651     newMask.push_back(eltMask);
    652   }
    653 
    654   // If the result mask is equal to one of the original shuffle masks,
    655   // or is a splat, do the replacement.
    656   if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
    657     SmallVector<Constant*, 16> Elts;
    658     Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
    659     for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
    660       if (newMask[i] < 0) {
    661         Elts.push_back(UndefValue::get(Int32Ty));
    662       } else {
    663         Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
    664       }
    665     }
    666     if (newRHS == NULL)
    667       newRHS = UndefValue::get(newLHS->getType());
    668     return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
    669   }
    670 
    671   return MadeChange ? &SVI : 0;
    672 }
    673