Home | History | Annotate | Download | only in InstCombine
      1 //===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains logic for simplifying instructions based on information
     11 // about how they are used.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 
     16 #include "InstCombine.h"
     17 #include "llvm/Target/TargetData.h"
     18 #include "llvm/IntrinsicInst.h"
     19 
     20 using namespace llvm;
     21 
     22 
     23 /// ShrinkDemandedConstant - Check to see if the specified operand of the
     24 /// specified instruction is a constant integer.  If so, check to see if there
     25 /// are any bits set in the constant that are not demanded.  If so, shrink the
     26 /// constant and return true.
     27 static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
     28                                    APInt Demanded) {
     29   assert(I && "No instruction?");
     30   assert(OpNo < I->getNumOperands() && "Operand index too large");
     31 
     32   // If the operand is not a constant integer, nothing to do.
     33   ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo));
     34   if (!OpC) return false;
     35 
     36   // If there are no bits set that aren't demanded, nothing to do.
     37   Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
     38   if ((~Demanded & OpC->getValue()) == 0)
     39     return false;
     40 
     41   // This instruction is producing bits that are not demanded. Shrink the RHS.
     42   Demanded &= OpC->getValue();
     43   I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
     44   return true;
     45 }
     46 
     47 
     48 
     49 /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
     50 /// SimplifyDemandedBits knows about.  See if the instruction has any
     51 /// properties that allow us to simplify its operands.
     52 bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
     53   unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
     54   APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
     55   APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
     56 
     57   Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask,
     58                                      KnownZero, KnownOne, 0);
     59   if (V == 0) return false;
     60   if (V == &Inst) return true;
     61   ReplaceInstUsesWith(Inst, V);
     62   return true;
     63 }
     64 
     65 /// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
     66 /// specified instruction operand if possible, updating it in place.  It returns
     67 /// true if it made any change and false otherwise.
     68 bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
     69                                         APInt &KnownZero, APInt &KnownOne,
     70                                         unsigned Depth) {
     71   Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
     72                                           KnownZero, KnownOne, Depth);
     73   if (NewVal == 0) return false;
     74   U = NewVal;
     75   return true;
     76 }
     77 
     78 
     79 /// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
     80 /// value based on the demanded bits.  When this function is called, it is known
     81 /// that only the bits set in DemandedMask of the result of V are ever used
     82 /// downstream. Consequently, depending on the mask and V, it may be possible
     83 /// to replace V with a constant or one of its operands. In such cases, this
     84 /// function does the replacement and returns true. In all other cases, it
     85 /// returns false after analyzing the expression and setting KnownOne and known
     86 /// to be one in the expression.  KnownZero contains all the bits that are known
     87 /// to be zero in the expression. These are provided to potentially allow the
     88 /// caller (which might recursively be SimplifyDemandedBits itself) to simplify
     89 /// the expression. KnownOne and KnownZero always follow the invariant that
     90 /// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
     91 /// the bits in KnownOne and KnownZero may only be accurate for those bits set
     92 /// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
     93 /// and KnownOne must all be the same.
     94 ///
     95 /// This returns null if it did not change anything and it permits no
     96 /// simplification.  This returns V itself if it did some simplification of V's
     97 /// operands based on the information about what bits are demanded. This returns
     98 /// some other non-null value if it found out that V is equal to another value
     99 /// in the context where the specified bits are demanded, but not for all users.
    100 Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
    101                                              APInt &KnownZero, APInt &KnownOne,
    102                                              unsigned Depth) {
    103   assert(V != 0 && "Null pointer of Value???");
    104   assert(Depth <= 6 && "Limit Search Depth");
    105   uint32_t BitWidth = DemandedMask.getBitWidth();
    106   Type *VTy = V->getType();
    107   assert((TD || !VTy->isPointerTy()) &&
    108          "SimplifyDemandedBits needs to know bit widths!");
    109   assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
    110          (!VTy->isIntOrIntVectorTy() ||
    111           VTy->getScalarSizeInBits() == BitWidth) &&
    112          KnownZero.getBitWidth() == BitWidth &&
    113          KnownOne.getBitWidth() == BitWidth &&
    114          "Value *V, DemandedMask, KnownZero and KnownOne "
    115          "must have same BitWidth");
    116   if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
    117     // We know all of the bits for a constant!
    118     KnownOne = CI->getValue() & DemandedMask;
    119     KnownZero = ~KnownOne & DemandedMask;
    120     return 0;
    121   }
    122   if (isa<ConstantPointerNull>(V)) {
    123     // We know all of the bits for a constant!
    124     KnownOne.clearAllBits();
    125     KnownZero = DemandedMask;
    126     return 0;
    127   }
    128 
    129   KnownZero.clearAllBits();
    130   KnownOne.clearAllBits();
    131   if (DemandedMask == 0) {   // Not demanding any bits from V.
    132     if (isa<UndefValue>(V))
    133       return 0;
    134     return UndefValue::get(VTy);
    135   }
    136 
    137   if (Depth == 6)        // Limit search depth.
    138     return 0;
    139 
    140   APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
    141   APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
    142 
    143   Instruction *I = dyn_cast<Instruction>(V);
    144   if (!I) {
    145     ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
    146     return 0;        // Only analyze instructions.
    147   }
    148 
    149   // If there are multiple uses of this value and we aren't at the root, then
    150   // we can't do any simplifications of the operands, because DemandedMask
    151   // only reflects the bits demanded by *one* of the users.
    152   if (Depth != 0 && !I->hasOneUse()) {
    153     // Despite the fact that we can't simplify this instruction in all User's
    154     // context, we can at least compute the knownzero/knownone bits, and we can
    155     // do simplifications that apply to *just* the one user if we know that
    156     // this instruction has a simpler value in that context.
    157     if (I->getOpcode() == Instruction::And) {
    158       // If either the LHS or the RHS are Zero, the result is zero.
    159       ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
    160       ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
    161 
    162       // If all of the demanded bits are known 1 on one side, return the other.
    163       // These bits cannot contribute to the result of the 'and' in this
    164       // context.
    165       if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
    166           (DemandedMask & ~LHSKnownZero))
    167         return I->getOperand(0);
    168       if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
    169           (DemandedMask & ~RHSKnownZero))
    170         return I->getOperand(1);
    171 
    172       // If all of the demanded bits in the inputs are known zeros, return zero.
    173       if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
    174         return Constant::getNullValue(VTy);
    175 
    176     } else if (I->getOpcode() == Instruction::Or) {
    177       // We can simplify (X|Y) -> X or Y in the user's context if we know that
    178       // only bits from X or Y are demanded.
    179 
    180       // If either the LHS or the RHS are One, the result is One.
    181       ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
    182       ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
    183 
    184       // If all of the demanded bits are known zero on one side, return the
    185       // other.  These bits cannot contribute to the result of the 'or' in this
    186       // context.
    187       if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
    188           (DemandedMask & ~LHSKnownOne))
    189         return I->getOperand(0);
    190       if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
    191           (DemandedMask & ~RHSKnownOne))
    192         return I->getOperand(1);
    193 
    194       // If all of the potentially set bits on one side are known to be set on
    195       // the other side, just use the 'other' side.
    196       if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
    197           (DemandedMask & (~RHSKnownZero)))
    198         return I->getOperand(0);
    199       if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
    200           (DemandedMask & (~LHSKnownZero)))
    201         return I->getOperand(1);
    202     }
    203 
    204     // Compute the KnownZero/KnownOne bits to simplify things downstream.
    205     ComputeMaskedBits(I, KnownZero, KnownOne, Depth);
    206     return 0;
    207   }
    208 
    209   // If this is the root being simplified, allow it to have multiple uses,
    210   // just set the DemandedMask to all bits so that we can try to simplify the
    211   // operands.  This allows visitTruncInst (for example) to simplify the
    212   // operand of a trunc without duplicating all the logic below.
    213   if (Depth == 0 && !V->hasOneUse())
    214     DemandedMask = APInt::getAllOnesValue(BitWidth);
    215 
    216   switch (I->getOpcode()) {
    217   default:
    218     ComputeMaskedBits(I, KnownZero, KnownOne, Depth);
    219     break;
    220   case Instruction::And:
    221     // If either the LHS or the RHS are Zero, the result is zero.
    222     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
    223                              RHSKnownZero, RHSKnownOne, Depth+1) ||
    224         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
    225                              LHSKnownZero, LHSKnownOne, Depth+1))
    226       return I;
    227     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
    228     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
    229 
    230     // If all of the demanded bits are known 1 on one side, return the other.
    231     // These bits cannot contribute to the result of the 'and'.
    232     if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
    233         (DemandedMask & ~LHSKnownZero))
    234       return I->getOperand(0);
    235     if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
    236         (DemandedMask & ~RHSKnownZero))
    237       return I->getOperand(1);
    238 
    239     // If all of the demanded bits in the inputs are known zeros, return zero.
    240     if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
    241       return Constant::getNullValue(VTy);
    242 
    243     // If the RHS is a constant, see if we can simplify it.
    244     if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
    245       return I;
    246 
    247     // Output known-1 bits are only known if set in both the LHS & RHS.
    248     KnownOne = RHSKnownOne & LHSKnownOne;
    249     // Output known-0 are known to be clear if zero in either the LHS | RHS.
    250     KnownZero = RHSKnownZero | LHSKnownZero;
    251     break;
    252   case Instruction::Or:
    253     // If either the LHS or the RHS are One, the result is One.
    254     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
    255                              RHSKnownZero, RHSKnownOne, Depth+1) ||
    256         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
    257                              LHSKnownZero, LHSKnownOne, Depth+1))
    258       return I;
    259     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
    260     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
    261 
    262     // If all of the demanded bits are known zero on one side, return the other.
    263     // These bits cannot contribute to the result of the 'or'.
    264     if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
    265         (DemandedMask & ~LHSKnownOne))
    266       return I->getOperand(0);
    267     if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
    268         (DemandedMask & ~RHSKnownOne))
    269       return I->getOperand(1);
    270 
    271     // If all of the potentially set bits on one side are known to be set on
    272     // the other side, just use the 'other' side.
    273     if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
    274         (DemandedMask & (~RHSKnownZero)))
    275       return I->getOperand(0);
    276     if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
    277         (DemandedMask & (~LHSKnownZero)))
    278       return I->getOperand(1);
    279 
    280     // If the RHS is a constant, see if we can simplify it.
    281     if (ShrinkDemandedConstant(I, 1, DemandedMask))
    282       return I;
    283 
    284     // Output known-0 bits are only known if clear in both the LHS & RHS.
    285     KnownZero = RHSKnownZero & LHSKnownZero;
    286     // Output known-1 are known to be set if set in either the LHS | RHS.
    287     KnownOne = RHSKnownOne | LHSKnownOne;
    288     break;
    289   case Instruction::Xor: {
    290     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
    291                              RHSKnownZero, RHSKnownOne, Depth+1) ||
    292         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
    293                              LHSKnownZero, LHSKnownOne, Depth+1))
    294       return I;
    295     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
    296     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
    297 
    298     // If all of the demanded bits are known zero on one side, return the other.
    299     // These bits cannot contribute to the result of the 'xor'.
    300     if ((DemandedMask & RHSKnownZero) == DemandedMask)
    301       return I->getOperand(0);
    302     if ((DemandedMask & LHSKnownZero) == DemandedMask)
    303       return I->getOperand(1);
    304 
    305     // If all of the demanded bits are known to be zero on one side or the
    306     // other, turn this into an *inclusive* or.
    307     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
    308     if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
    309       Instruction *Or =
    310         BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
    311                                  I->getName());
    312       return InsertNewInstWith(Or, *I);
    313     }
    314 
    315     // If all of the demanded bits on one side are known, and all of the set
    316     // bits on that side are also known to be set on the other side, turn this
    317     // into an AND, as we know the bits will be cleared.
    318     //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
    319     if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
    320       // all known
    321       if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
    322         Constant *AndC = Constant::getIntegerValue(VTy,
    323                                                    ~RHSKnownOne & DemandedMask);
    324         Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
    325         return InsertNewInstWith(And, *I);
    326       }
    327     }
    328 
    329     // If the RHS is a constant, see if we can simplify it.
    330     // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
    331     if (ShrinkDemandedConstant(I, 1, DemandedMask))
    332       return I;
    333 
    334     // If our LHS is an 'and' and if it has one use, and if any of the bits we
    335     // are flipping are known to be set, then the xor is just resetting those
    336     // bits to zero.  We can just knock out bits from the 'and' and the 'xor',
    337     // simplifying both of them.
    338     if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
    339       if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
    340           isa<ConstantInt>(I->getOperand(1)) &&
    341           isa<ConstantInt>(LHSInst->getOperand(1)) &&
    342           (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
    343         ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
    344         ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
    345         APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
    346 
    347         Constant *AndC =
    348           ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
    349         Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
    350         InsertNewInstWith(NewAnd, *I);
    351 
    352         Constant *XorC =
    353           ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
    354         Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
    355         return InsertNewInstWith(NewXor, *I);
    356       }
    357 
    358     // Output known-0 bits are known if clear or set in both the LHS & RHS.
    359     KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne);
    360     // Output known-1 are known to be set if set in only one of the LHS, RHS.
    361     KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero);
    362     break;
    363   }
    364   case Instruction::Select:
    365     if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask,
    366                              RHSKnownZero, RHSKnownOne, Depth+1) ||
    367         SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
    368                              LHSKnownZero, LHSKnownOne, Depth+1))
    369       return I;
    370     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
    371     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
    372 
    373     // If the operands are constants, see if we can simplify them.
    374     if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
    375         ShrinkDemandedConstant(I, 2, DemandedMask))
    376       return I;
    377 
    378     // Only known if known in both the LHS and RHS.
    379     KnownOne = RHSKnownOne & LHSKnownOne;
    380     KnownZero = RHSKnownZero & LHSKnownZero;
    381     break;
    382   case Instruction::Trunc: {
    383     unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
    384     DemandedMask = DemandedMask.zext(truncBf);
    385     KnownZero = KnownZero.zext(truncBf);
    386     KnownOne = KnownOne.zext(truncBf);
    387     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
    388                              KnownZero, KnownOne, Depth+1))
    389       return I;
    390     DemandedMask = DemandedMask.trunc(BitWidth);
    391     KnownZero = KnownZero.trunc(BitWidth);
    392     KnownOne = KnownOne.trunc(BitWidth);
    393     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    394     break;
    395   }
    396   case Instruction::BitCast:
    397     if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
    398       return 0;  // vector->int or fp->int?
    399 
    400     if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
    401       if (VectorType *SrcVTy =
    402             dyn_cast<VectorType>(I->getOperand(0)->getType())) {
    403         if (DstVTy->getNumElements() != SrcVTy->getNumElements())
    404           // Don't touch a bitcast between vectors of different element counts.
    405           return 0;
    406       } else
    407         // Don't touch a scalar-to-vector bitcast.
    408         return 0;
    409     } else if (I->getOperand(0)->getType()->isVectorTy())
    410       // Don't touch a vector-to-scalar bitcast.
    411       return 0;
    412 
    413     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
    414                              KnownZero, KnownOne, Depth+1))
    415       return I;
    416     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    417     break;
    418   case Instruction::ZExt: {
    419     // Compute the bits in the result that are not present in the input.
    420     unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
    421 
    422     DemandedMask = DemandedMask.trunc(SrcBitWidth);
    423     KnownZero = KnownZero.trunc(SrcBitWidth);
    424     KnownOne = KnownOne.trunc(SrcBitWidth);
    425     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
    426                              KnownZero, KnownOne, Depth+1))
    427       return I;
    428     DemandedMask = DemandedMask.zext(BitWidth);
    429     KnownZero = KnownZero.zext(BitWidth);
    430     KnownOne = KnownOne.zext(BitWidth);
    431     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    432     // The top bits are known to be zero.
    433     KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
    434     break;
    435   }
    436   case Instruction::SExt: {
    437     // Compute the bits in the result that are not present in the input.
    438     unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
    439 
    440     APInt InputDemandedBits = DemandedMask &
    441                               APInt::getLowBitsSet(BitWidth, SrcBitWidth);
    442 
    443     APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
    444     // If any of the sign extended bits are demanded, we know that the sign
    445     // bit is demanded.
    446     if ((NewBits & DemandedMask) != 0)
    447       InputDemandedBits.setBit(SrcBitWidth-1);
    448 
    449     InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
    450     KnownZero = KnownZero.trunc(SrcBitWidth);
    451     KnownOne = KnownOne.trunc(SrcBitWidth);
    452     if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
    453                              KnownZero, KnownOne, Depth+1))
    454       return I;
    455     InputDemandedBits = InputDemandedBits.zext(BitWidth);
    456     KnownZero = KnownZero.zext(BitWidth);
    457     KnownOne = KnownOne.zext(BitWidth);
    458     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    459 
    460     // If the sign bit of the input is known set or clear, then we know the
    461     // top bits of the result.
    462 
    463     // If the input sign bit is known zero, or if the NewBits are not demanded
    464     // convert this into a zero extension.
    465     if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
    466       // Convert to ZExt cast
    467       CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
    468       return InsertNewInstWith(NewCast, *I);
    469     } else if (KnownOne[SrcBitWidth-1]) {    // Input sign bit known set
    470       KnownOne |= NewBits;
    471     }
    472     break;
    473   }
    474   case Instruction::Add: {
    475     // Figure out what the input bits are.  If the top bits of the and result
    476     // are not demanded, then the add doesn't demand them from its input
    477     // either.
    478     unsigned NLZ = DemandedMask.countLeadingZeros();
    479 
    480     // If there is a constant on the RHS, there are a variety of xformations
    481     // we can do.
    482     if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
    483       // If null, this should be simplified elsewhere.  Some of the xforms here
    484       // won't work if the RHS is zero.
    485       if (RHS->isZero())
    486         break;
    487 
    488       // If the top bit of the output is demanded, demand everything from the
    489       // input.  Otherwise, we demand all the input bits except NLZ top bits.
    490       APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
    491 
    492       // Find information about known zero/one bits in the input.
    493       if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits,
    494                                LHSKnownZero, LHSKnownOne, Depth+1))
    495         return I;
    496 
    497       // If the RHS of the add has bits set that can't affect the input, reduce
    498       // the constant.
    499       if (ShrinkDemandedConstant(I, 1, InDemandedBits))
    500         return I;
    501 
    502       // Avoid excess work.
    503       if (LHSKnownZero == 0 && LHSKnownOne == 0)
    504         break;
    505 
    506       // Turn it into OR if input bits are zero.
    507       if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
    508         Instruction *Or =
    509           BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
    510                                    I->getName());
    511         return InsertNewInstWith(Or, *I);
    512       }
    513 
    514       // We can say something about the output known-zero and known-one bits,
    515       // depending on potential carries from the input constant and the
    516       // unknowns.  For example if the LHS is known to have at most the 0x0F0F0
    517       // bits set and the RHS constant is 0x01001, then we know we have a known
    518       // one mask of 0x00001 and a known zero mask of 0xE0F0E.
    519 
    520       // To compute this, we first compute the potential carry bits.  These are
    521       // the bits which may be modified.  I'm not aware of a better way to do
    522       // this scan.
    523       const APInt &RHSVal = RHS->getValue();
    524       APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
    525 
    526       // Now that we know which bits have carries, compute the known-1/0 sets.
    527 
    528       // Bits are known one if they are known zero in one operand and one in the
    529       // other, and there is no input carry.
    530       KnownOne = ((LHSKnownZero & RHSVal) |
    531                   (LHSKnownOne & ~RHSVal)) & ~CarryBits;
    532 
    533       // Bits are known zero if they are known zero in both operands and there
    534       // is no input carry.
    535       KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
    536     } else {
    537       // If the high-bits of this ADD are not demanded, then it does not demand
    538       // the high bits of its LHS or RHS.
    539       if (DemandedMask[BitWidth-1] == 0) {
    540         // Right fill the mask of bits for this ADD to demand the most
    541         // significant bit and all those below it.
    542         APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
    543         if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
    544                                  LHSKnownZero, LHSKnownOne, Depth+1) ||
    545             SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
    546                                  LHSKnownZero, LHSKnownOne, Depth+1))
    547           return I;
    548       }
    549     }
    550     break;
    551   }
    552   case Instruction::Sub:
    553     // If the high-bits of this SUB are not demanded, then it does not demand
    554     // the high bits of its LHS or RHS.
    555     if (DemandedMask[BitWidth-1] == 0) {
    556       // Right fill the mask of bits for this SUB to demand the most
    557       // significant bit and all those below it.
    558       uint32_t NLZ = DemandedMask.countLeadingZeros();
    559       APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
    560       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
    561                                LHSKnownZero, LHSKnownOne, Depth+1) ||
    562           SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
    563                                LHSKnownZero, LHSKnownOne, Depth+1))
    564         return I;
    565     }
    566 
    567     // Otherwise just hand the sub off to ComputeMaskedBits to fill in
    568     // the known zeros and ones.
    569     ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
    570 
    571     // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
    572     // zero.
    573     if (ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(0))) {
    574       APInt I0 = C0->getValue();
    575       if ((I0 + 1).isPowerOf2() && (I0 | KnownZero).isAllOnesValue()) {
    576         Instruction *Xor = BinaryOperator::CreateXor(I->getOperand(1), C0);
    577         return InsertNewInstWith(Xor, *I);
    578       }
    579     }
    580     break;
    581   case Instruction::Shl:
    582     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
    583       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
    584       APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
    585 
    586       // If the shift is NUW/NSW, then it does demand the high bits.
    587       ShlOperator *IOp = cast<ShlOperator>(I);
    588       if (IOp->hasNoSignedWrap())
    589         DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
    590       else if (IOp->hasNoUnsignedWrap())
    591         DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
    592 
    593       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
    594                                KnownZero, KnownOne, Depth+1))
    595         return I;
    596       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    597       KnownZero <<= ShiftAmt;
    598       KnownOne  <<= ShiftAmt;
    599       // low bits known zero.
    600       if (ShiftAmt)
    601         KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
    602     }
    603     break;
    604   case Instruction::LShr:
    605     // For a logical shift right
    606     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
    607       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
    608 
    609       // Unsigned shift right.
    610       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
    611 
    612       // If the shift is exact, then it does demand the low bits (and knows that
    613       // they are zero).
    614       if (cast<LShrOperator>(I)->isExact())
    615         DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
    616 
    617       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
    618                                KnownZero, KnownOne, Depth+1))
    619         return I;
    620       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    621       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
    622       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
    623       if (ShiftAmt) {
    624         // Compute the new bits that are at the top now.
    625         APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
    626         KnownZero |= HighBits;  // high bits known zero.
    627       }
    628     }
    629     break;
    630   case Instruction::AShr:
    631     // If this is an arithmetic shift right and only the low-bit is set, we can
    632     // always convert this into a logical shr, even if the shift amount is
    633     // variable.  The low bit of the shift cannot be an input sign bit unless
    634     // the shift amount is >= the size of the datatype, which is undefined.
    635     if (DemandedMask == 1) {
    636       // Perform the logical shift right.
    637       Instruction *NewVal = BinaryOperator::CreateLShr(
    638                         I->getOperand(0), I->getOperand(1), I->getName());
    639       return InsertNewInstWith(NewVal, *I);
    640     }
    641 
    642     // If the sign bit is the only bit demanded by this ashr, then there is no
    643     // need to do it, the shift doesn't change the high bit.
    644     if (DemandedMask.isSignBit())
    645       return I->getOperand(0);
    646 
    647     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
    648       uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
    649 
    650       // Signed shift right.
    651       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
    652       // If any of the "high bits" are demanded, we should set the sign bit as
    653       // demanded.
    654       if (DemandedMask.countLeadingZeros() <= ShiftAmt)
    655         DemandedMaskIn.setBit(BitWidth-1);
    656 
    657       // If the shift is exact, then it does demand the low bits (and knows that
    658       // they are zero).
    659       if (cast<AShrOperator>(I)->isExact())
    660         DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
    661 
    662       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
    663                                KnownZero, KnownOne, Depth+1))
    664         return I;
    665       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    666       // Compute the new bits that are at the top now.
    667       APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
    668       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
    669       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
    670 
    671       // Handle the sign bits.
    672       APInt SignBit(APInt::getSignBit(BitWidth));
    673       // Adjust to where it is now in the mask.
    674       SignBit = APIntOps::lshr(SignBit, ShiftAmt);
    675 
    676       // If the input sign bit is known to be zero, or if none of the top bits
    677       // are demanded, turn this into an unsigned shift right.
    678       if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
    679           (HighBits & ~DemandedMask) == HighBits) {
    680         // Perform the logical shift right.
    681         BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0),
    682                                                             SA, I->getName());
    683         NewVal->setIsExact(cast<BinaryOperator>(I)->isExact());
    684         return InsertNewInstWith(NewVal, *I);
    685       } else if ((KnownOne & SignBit) != 0) { // New bits are known one.
    686         KnownOne |= HighBits;
    687       }
    688     }
    689     break;
    690   case Instruction::SRem:
    691     if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
    692       // X % -1 demands all the bits because we don't want to introduce
    693       // INT_MIN % -1 (== undef) by accident.
    694       if (Rem->isAllOnesValue())
    695         break;
    696       APInt RA = Rem->getValue().abs();
    697       if (RA.isPowerOf2()) {
    698         if (DemandedMask.ult(RA))    // srem won't affect demanded bits
    699           return I->getOperand(0);
    700 
    701         APInt LowBits = RA - 1;
    702         APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
    703         if (SimplifyDemandedBits(I->getOperandUse(0), Mask2,
    704                                  LHSKnownZero, LHSKnownOne, Depth+1))
    705           return I;
    706 
    707         // The low bits of LHS are unchanged by the srem.
    708         KnownZero = LHSKnownZero & LowBits;
    709         KnownOne = LHSKnownOne & LowBits;
    710 
    711         // If LHS is non-negative or has all low bits zero, then the upper bits
    712         // are all zero.
    713         if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
    714           KnownZero |= ~LowBits;
    715 
    716         // If LHS is negative and not all low bits are zero, then the upper bits
    717         // are all one.
    718         if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0))
    719           KnownOne |= ~LowBits;
    720 
    721         assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    722       }
    723     }
    724 
    725     // The sign bit is the LHS's sign bit, except when the result of the
    726     // remainder is zero.
    727     if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
    728       APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
    729       ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
    730       // If it's known zero, our sign bit is also zero.
    731       if (LHSKnownZero.isNegative())
    732         KnownZero |= LHSKnownZero;
    733     }
    734     break;
    735   case Instruction::URem: {
    736     APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
    737     APInt AllOnes = APInt::getAllOnesValue(BitWidth);
    738     if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes,
    739                              KnownZero2, KnownOne2, Depth+1) ||
    740         SimplifyDemandedBits(I->getOperandUse(1), AllOnes,
    741                              KnownZero2, KnownOne2, Depth+1))
    742       return I;
    743 
    744     unsigned Leaders = KnownZero2.countLeadingOnes();
    745     Leaders = std::max(Leaders,
    746                        KnownZero2.countLeadingOnes());
    747     KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
    748     break;
    749   }
    750   case Instruction::Call:
    751     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
    752       switch (II->getIntrinsicID()) {
    753       default: break;
    754       case Intrinsic::bswap: {
    755         // If the only bits demanded come from one byte of the bswap result,
    756         // just shift the input byte into position to eliminate the bswap.
    757         unsigned NLZ = DemandedMask.countLeadingZeros();
    758         unsigned NTZ = DemandedMask.countTrailingZeros();
    759 
    760         // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
    761         // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
    762         // have 14 leading zeros, round to 8.
    763         NLZ &= ~7;
    764         NTZ &= ~7;
    765         // If we need exactly one byte, we can do this transformation.
    766         if (BitWidth-NLZ-NTZ == 8) {
    767           unsigned ResultBit = NTZ;
    768           unsigned InputBit = BitWidth-NTZ-8;
    769 
    770           // Replace this with either a left or right shift to get the byte into
    771           // the right place.
    772           Instruction *NewVal;
    773           if (InputBit > ResultBit)
    774             NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
    775                     ConstantInt::get(I->getType(), InputBit-ResultBit));
    776           else
    777             NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
    778                     ConstantInt::get(I->getType(), ResultBit-InputBit));
    779           NewVal->takeName(I);
    780           return InsertNewInstWith(NewVal, *I);
    781         }
    782 
    783         // TODO: Could compute known zero/one bits based on the input.
    784         break;
    785       }
    786       case Intrinsic::x86_sse42_crc32_64_8:
    787       case Intrinsic::x86_sse42_crc32_64_64:
    788         KnownZero = APInt::getHighBitsSet(64, 32);
    789         return 0;
    790       }
    791     }
    792     ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
    793     break;
    794   }
    795 
    796   // If the client is only demanding bits that we know, return the known
    797   // constant.
    798   if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
    799     return Constant::getIntegerValue(VTy, KnownOne);
    800   return 0;
    801 }
    802 
    803 
    804 /// SimplifyDemandedVectorElts - The specified value produces a vector with
    805 /// any number of elements. DemandedElts contains the set of elements that are
    806 /// actually used by the caller.  This method analyzes which elements of the
    807 /// operand are undef and returns that information in UndefElts.
    808 ///
    809 /// If the information about demanded elements can be used to simplify the
    810 /// operation, the operation is simplified, then the resultant value is
    811 /// returned.  This returns null if no change was made.
    812 Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
    813                                                 APInt &UndefElts,
    814                                                 unsigned Depth) {
    815   unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
    816   APInt EltMask(APInt::getAllOnesValue(VWidth));
    817   assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
    818 
    819   if (isa<UndefValue>(V)) {
    820     // If the entire vector is undefined, just return this info.
    821     UndefElts = EltMask;
    822     return 0;
    823   }
    824 
    825   if (DemandedElts == 0) { // If nothing is demanded, provide undef.
    826     UndefElts = EltMask;
    827     return UndefValue::get(V->getType());
    828   }
    829 
    830   UndefElts = 0;
    831 
    832   // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
    833   if (Constant *C = dyn_cast<Constant>(V)) {
    834     // Check if this is identity. If so, return 0 since we are not simplifying
    835     // anything.
    836     if (DemandedElts.isAllOnesValue())
    837       return 0;
    838 
    839     Type *EltTy = cast<VectorType>(V->getType())->getElementType();
    840     Constant *Undef = UndefValue::get(EltTy);
    841 
    842     SmallVector<Constant*, 16> Elts;
    843     for (unsigned i = 0; i != VWidth; ++i) {
    844       if (!DemandedElts[i]) {   // If not demanded, set to undef.
    845         Elts.push_back(Undef);
    846         UndefElts.setBit(i);
    847         continue;
    848       }
    849 
    850       Constant *Elt = C->getAggregateElement(i);
    851       if (Elt == 0) return 0;
    852 
    853       if (isa<UndefValue>(Elt)) {   // Already undef.
    854         Elts.push_back(Undef);
    855         UndefElts.setBit(i);
    856       } else {                               // Otherwise, defined.
    857         Elts.push_back(Elt);
    858       }
    859     }
    860 
    861     // If we changed the constant, return it.
    862     Constant *NewCV = ConstantVector::get(Elts);
    863     return NewCV != C ? NewCV : 0;
    864   }
    865 
    866   // Limit search depth.
    867   if (Depth == 10)
    868     return 0;
    869 
    870   // If multiple users are using the root value, proceed with
    871   // simplification conservatively assuming that all elements
    872   // are needed.
    873   if (!V->hasOneUse()) {
    874     // Quit if we find multiple users of a non-root value though.
    875     // They'll be handled when it's their turn to be visited by
    876     // the main instcombine process.
    877     if (Depth != 0)
    878       // TODO: Just compute the UndefElts information recursively.
    879       return 0;
    880 
    881     // Conservatively assume that all elements are needed.
    882     DemandedElts = EltMask;
    883   }
    884 
    885   Instruction *I = dyn_cast<Instruction>(V);
    886   if (!I) return 0;        // Only analyze instructions.
    887 
    888   bool MadeChange = false;
    889   APInt UndefElts2(VWidth, 0);
    890   Value *TmpV;
    891   switch (I->getOpcode()) {
    892   default: break;
    893 
    894   case Instruction::InsertElement: {
    895     // If this is a variable index, we don't know which element it overwrites.
    896     // demand exactly the same input as we produce.
    897     ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
    898     if (Idx == 0) {
    899       // Note that we can't propagate undef elt info, because we don't know
    900       // which elt is getting updated.
    901       TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
    902                                         UndefElts2, Depth+1);
    903       if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
    904       break;
    905     }
    906 
    907     // If this is inserting an element that isn't demanded, remove this
    908     // insertelement.
    909     unsigned IdxNo = Idx->getZExtValue();
    910     if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
    911       Worklist.Add(I);
    912       return I->getOperand(0);
    913     }
    914 
    915     // Otherwise, the element inserted overwrites whatever was there, so the
    916     // input demanded set is simpler than the output set.
    917     APInt DemandedElts2 = DemandedElts;
    918     DemandedElts2.clearBit(IdxNo);
    919     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
    920                                       UndefElts, Depth+1);
    921     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
    922 
    923     // The inserted element is defined.
    924     UndefElts.clearBit(IdxNo);
    925     break;
    926   }
    927   case Instruction::ShuffleVector: {
    928     ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
    929     uint64_t LHSVWidth =
    930       cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
    931     APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
    932     for (unsigned i = 0; i < VWidth; i++) {
    933       if (DemandedElts[i]) {
    934         unsigned MaskVal = Shuffle->getMaskValue(i);
    935         if (MaskVal != -1u) {
    936           assert(MaskVal < LHSVWidth * 2 &&
    937                  "shufflevector mask index out of range!");
    938           if (MaskVal < LHSVWidth)
    939             LeftDemanded.setBit(MaskVal);
    940           else
    941             RightDemanded.setBit(MaskVal - LHSVWidth);
    942         }
    943       }
    944     }
    945 
    946     APInt UndefElts4(LHSVWidth, 0);
    947     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
    948                                       UndefElts4, Depth+1);
    949     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
    950 
    951     APInt UndefElts3(LHSVWidth, 0);
    952     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
    953                                       UndefElts3, Depth+1);
    954     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
    955 
    956     bool NewUndefElts = false;
    957     for (unsigned i = 0; i < VWidth; i++) {
    958       unsigned MaskVal = Shuffle->getMaskValue(i);
    959       if (MaskVal == -1u) {
    960         UndefElts.setBit(i);
    961       } else if (!DemandedElts[i]) {
    962         NewUndefElts = true;
    963         UndefElts.setBit(i);
    964       } else if (MaskVal < LHSVWidth) {
    965         if (UndefElts4[MaskVal]) {
    966           NewUndefElts = true;
    967           UndefElts.setBit(i);
    968         }
    969       } else {
    970         if (UndefElts3[MaskVal - LHSVWidth]) {
    971           NewUndefElts = true;
    972           UndefElts.setBit(i);
    973         }
    974       }
    975     }
    976 
    977     if (NewUndefElts) {
    978       // Add additional discovered undefs.
    979       SmallVector<Constant*, 16> Elts;
    980       for (unsigned i = 0; i < VWidth; ++i) {
    981         if (UndefElts[i])
    982           Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
    983         else
    984           Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
    985                                           Shuffle->getMaskValue(i)));
    986       }
    987       I->setOperand(2, ConstantVector::get(Elts));
    988       MadeChange = true;
    989     }
    990     break;
    991   }
    992   case Instruction::BitCast: {
    993     // Vector->vector casts only.
    994     VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
    995     if (!VTy) break;
    996     unsigned InVWidth = VTy->getNumElements();
    997     APInt InputDemandedElts(InVWidth, 0);
    998     unsigned Ratio;
    999 
   1000     if (VWidth == InVWidth) {
   1001       // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
   1002       // elements as are demanded of us.
   1003       Ratio = 1;
   1004       InputDemandedElts = DemandedElts;
   1005     } else if (VWidth > InVWidth) {
   1006       // Untested so far.
   1007       break;
   1008 
   1009       // If there are more elements in the result than there are in the source,
   1010       // then an input element is live if any of the corresponding output
   1011       // elements are live.
   1012       Ratio = VWidth/InVWidth;
   1013       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
   1014         if (DemandedElts[OutIdx])
   1015           InputDemandedElts.setBit(OutIdx/Ratio);
   1016       }
   1017     } else {
   1018       // Untested so far.
   1019       break;
   1020 
   1021       // If there are more elements in the source than there are in the result,
   1022       // then an input element is live if the corresponding output element is
   1023       // live.
   1024       Ratio = InVWidth/VWidth;
   1025       for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
   1026         if (DemandedElts[InIdx/Ratio])
   1027           InputDemandedElts.setBit(InIdx);
   1028     }
   1029 
   1030     // div/rem demand all inputs, because they don't want divide by zero.
   1031     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
   1032                                       UndefElts2, Depth+1);
   1033     if (TmpV) {
   1034       I->setOperand(0, TmpV);
   1035       MadeChange = true;
   1036     }
   1037 
   1038     UndefElts = UndefElts2;
   1039     if (VWidth > InVWidth) {
   1040       llvm_unreachable("Unimp");
   1041       // If there are more elements in the result than there are in the source,
   1042       // then an output element is undef if the corresponding input element is
   1043       // undef.
   1044       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
   1045         if (UndefElts2[OutIdx/Ratio])
   1046           UndefElts.setBit(OutIdx);
   1047     } else if (VWidth < InVWidth) {
   1048       llvm_unreachable("Unimp");
   1049       // If there are more elements in the source than there are in the result,
   1050       // then a result element is undef if all of the corresponding input
   1051       // elements are undef.
   1052       UndefElts = ~0ULL >> (64-VWidth);  // Start out all undef.
   1053       for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
   1054         if (!UndefElts2[InIdx])            // Not undef?
   1055           UndefElts.clearBit(InIdx/Ratio);    // Clear undef bit.
   1056     }
   1057     break;
   1058   }
   1059   case Instruction::And:
   1060   case Instruction::Or:
   1061   case Instruction::Xor:
   1062   case Instruction::Add:
   1063   case Instruction::Sub:
   1064   case Instruction::Mul:
   1065     // div/rem demand all inputs, because they don't want divide by zero.
   1066     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
   1067                                       UndefElts, Depth+1);
   1068     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
   1069     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
   1070                                       UndefElts2, Depth+1);
   1071     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
   1072 
   1073     // Output elements are undefined if both are undefined.  Consider things
   1074     // like undef&0.  The result is known zero, not undef.
   1075     UndefElts &= UndefElts2;
   1076     break;
   1077 
   1078   case Instruction::Call: {
   1079     IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
   1080     if (!II) break;
   1081     switch (II->getIntrinsicID()) {
   1082     default: break;
   1083 
   1084     // Binary vector operations that work column-wise.  A dest element is a
   1085     // function of the corresponding input elements from the two inputs.
   1086     case Intrinsic::x86_sse_sub_ss:
   1087     case Intrinsic::x86_sse_mul_ss:
   1088     case Intrinsic::x86_sse_min_ss:
   1089     case Intrinsic::x86_sse_max_ss:
   1090     case Intrinsic::x86_sse2_sub_sd:
   1091     case Intrinsic::x86_sse2_mul_sd:
   1092     case Intrinsic::x86_sse2_min_sd:
   1093     case Intrinsic::x86_sse2_max_sd:
   1094       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
   1095                                         UndefElts, Depth+1);
   1096       if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
   1097       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
   1098                                         UndefElts2, Depth+1);
   1099       if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
   1100 
   1101       // If only the low elt is demanded and this is a scalarizable intrinsic,
   1102       // scalarize it now.
   1103       if (DemandedElts == 1) {
   1104         switch (II->getIntrinsicID()) {
   1105         default: break;
   1106         case Intrinsic::x86_sse_sub_ss:
   1107         case Intrinsic::x86_sse_mul_ss:
   1108         case Intrinsic::x86_sse2_sub_sd:
   1109         case Intrinsic::x86_sse2_mul_sd:
   1110           // TODO: Lower MIN/MAX/ABS/etc
   1111           Value *LHS = II->getArgOperand(0);
   1112           Value *RHS = II->getArgOperand(1);
   1113           // Extract the element as scalars.
   1114           LHS = InsertNewInstWith(ExtractElementInst::Create(LHS,
   1115             ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
   1116           RHS = InsertNewInstWith(ExtractElementInst::Create(RHS,
   1117             ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
   1118 
   1119           switch (II->getIntrinsicID()) {
   1120           default: llvm_unreachable("Case stmts out of sync!");
   1121           case Intrinsic::x86_sse_sub_ss:
   1122           case Intrinsic::x86_sse2_sub_sd:
   1123             TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS,
   1124                                                         II->getName()), *II);
   1125             break;
   1126           case Intrinsic::x86_sse_mul_ss:
   1127           case Intrinsic::x86_sse2_mul_sd:
   1128             TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS,
   1129                                                          II->getName()), *II);
   1130             break;
   1131           }
   1132 
   1133           Instruction *New =
   1134             InsertElementInst::Create(
   1135               UndefValue::get(II->getType()), TmpV,
   1136               ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false),
   1137                                       II->getName());
   1138           InsertNewInstWith(New, *II);
   1139           return New;
   1140         }
   1141       }
   1142 
   1143       // Output elements are undefined if both are undefined.  Consider things
   1144       // like undef&0.  The result is known zero, not undef.
   1145       UndefElts &= UndefElts2;
   1146       break;
   1147     }
   1148     break;
   1149   }
   1150   }
   1151   return MadeChange ? I : 0;
   1152 }
   1153