Home | History | Annotate | Download | only in InstCombine
      1 //===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains logic for simplifying instructions based on information
     11 // about how they are used.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "InstCombine.h"
     16 #include "llvm/IR/DataLayout.h"
     17 #include "llvm/IR/IntrinsicInst.h"
     18 #include "llvm/IR/PatternMatch.h"
     19 
     20 using namespace llvm;
     21 using namespace llvm::PatternMatch;
     22 
     23 #define DEBUG_TYPE "instcombine"
     24 
     25 /// ShrinkDemandedConstant - Check to see if the specified operand of the
     26 /// specified instruction is a constant integer.  If so, check to see if there
     27 /// are any bits set in the constant that are not demanded.  If so, shrink the
     28 /// constant and return true.
     29 static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
     30                                    APInt Demanded) {
     31   assert(I && "No instruction?");
     32   assert(OpNo < I->getNumOperands() && "Operand index too large");
     33 
     34   // If the operand is not a constant integer, nothing to do.
     35   ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo));
     36   if (!OpC) return false;
     37 
     38   // If there are no bits set that aren't demanded, nothing to do.
     39   Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
     40   if ((~Demanded & OpC->getValue()) == 0)
     41     return false;
     42 
     43   // This instruction is producing bits that are not demanded. Shrink the RHS.
     44   Demanded &= OpC->getValue();
     45   I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
     46   return true;
     47 }
     48 
     49 
     50 
     51 /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
     52 /// SimplifyDemandedBits knows about.  See if the instruction has any
     53 /// properties that allow us to simplify its operands.
     54 bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
     55   unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
     56   APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
     57   APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
     58 
     59   Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask,
     60                                      KnownZero, KnownOne, 0);
     61   if (!V) return false;
     62   if (V == &Inst) return true;
     63   ReplaceInstUsesWith(Inst, V);
     64   return true;
     65 }
     66 
     67 /// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
     68 /// specified instruction operand if possible, updating it in place.  It returns
     69 /// true if it made any change and false otherwise.
     70 bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
     71                                         APInt &KnownZero, APInt &KnownOne,
     72                                         unsigned Depth) {
     73   Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
     74                                           KnownZero, KnownOne, Depth);
     75   if (!NewVal) return false;
     76   U = NewVal;
     77   return true;
     78 }
     79 
     80 
     81 /// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
     82 /// value based on the demanded bits.  When this function is called, it is known
     83 /// that only the bits set in DemandedMask of the result of V are ever used
     84 /// downstream. Consequently, depending on the mask and V, it may be possible
     85 /// to replace V with a constant or one of its operands. In such cases, this
     86 /// function does the replacement and returns true. In all other cases, it
     87 /// returns false after analyzing the expression and setting KnownOne and known
     88 /// to be one in the expression.  KnownZero contains all the bits that are known
     89 /// to be zero in the expression. These are provided to potentially allow the
     90 /// caller (which might recursively be SimplifyDemandedBits itself) to simplify
     91 /// the expression. KnownOne and KnownZero always follow the invariant that
     92 /// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
     93 /// the bits in KnownOne and KnownZero may only be accurate for those bits set
     94 /// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
     95 /// and KnownOne must all be the same.
     96 ///
     97 /// This returns null if it did not change anything and it permits no
     98 /// simplification.  This returns V itself if it did some simplification of V's
     99 /// operands based on the information about what bits are demanded. This returns
    100 /// some other non-null value if it found out that V is equal to another value
    101 /// in the context where the specified bits are demanded, but not for all users.
    102 Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
    103                                              APInt &KnownZero, APInt &KnownOne,
    104                                              unsigned Depth) {
    105   assert(V != nullptr && "Null pointer of Value???");
    106   assert(Depth <= 6 && "Limit Search Depth");
    107   uint32_t BitWidth = DemandedMask.getBitWidth();
    108   Type *VTy = V->getType();
    109   assert((DL || !VTy->isPointerTy()) &&
    110          "SimplifyDemandedBits needs to know bit widths!");
    111   assert((!DL || DL->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
    112          (!VTy->isIntOrIntVectorTy() ||
    113           VTy->getScalarSizeInBits() == BitWidth) &&
    114          KnownZero.getBitWidth() == BitWidth &&
    115          KnownOne.getBitWidth() == BitWidth &&
    116          "Value *V, DemandedMask, KnownZero and KnownOne "
    117          "must have same BitWidth");
    118   if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
    119     // We know all of the bits for a constant!
    120     KnownOne = CI->getValue() & DemandedMask;
    121     KnownZero = ~KnownOne & DemandedMask;
    122     return nullptr;
    123   }
    124   if (isa<ConstantPointerNull>(V)) {
    125     // We know all of the bits for a constant!
    126     KnownOne.clearAllBits();
    127     KnownZero = DemandedMask;
    128     return nullptr;
    129   }
    130 
    131   KnownZero.clearAllBits();
    132   KnownOne.clearAllBits();
    133   if (DemandedMask == 0) {   // Not demanding any bits from V.
    134     if (isa<UndefValue>(V))
    135       return nullptr;
    136     return UndefValue::get(VTy);
    137   }
    138 
    139   if (Depth == 6)        // Limit search depth.
    140     return nullptr;
    141 
    142   APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
    143   APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
    144 
    145   Instruction *I = dyn_cast<Instruction>(V);
    146   if (!I) {
    147     computeKnownBits(V, KnownZero, KnownOne, Depth);
    148     return nullptr;        // Only analyze instructions.
    149   }
    150 
    151   // If there are multiple uses of this value and we aren't at the root, then
    152   // we can't do any simplifications of the operands, because DemandedMask
    153   // only reflects the bits demanded by *one* of the users.
    154   if (Depth != 0 && !I->hasOneUse()) {
    155     // Despite the fact that we can't simplify this instruction in all User's
    156     // context, we can at least compute the knownzero/knownone bits, and we can
    157     // do simplifications that apply to *just* the one user if we know that
    158     // this instruction has a simpler value in that context.
    159     if (I->getOpcode() == Instruction::And) {
    160       // If either the LHS or the RHS are Zero, the result is zero.
    161       computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
    162       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
    163 
    164       // If all of the demanded bits are known 1 on one side, return the other.
    165       // These bits cannot contribute to the result of the 'and' in this
    166       // context.
    167       if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
    168           (DemandedMask & ~LHSKnownZero))
    169         return I->getOperand(0);
    170       if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
    171           (DemandedMask & ~RHSKnownZero))
    172         return I->getOperand(1);
    173 
    174       // If all of the demanded bits in the inputs are known zeros, return zero.
    175       if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
    176         return Constant::getNullValue(VTy);
    177 
    178     } else if (I->getOpcode() == Instruction::Or) {
    179       // We can simplify (X|Y) -> X or Y in the user's context if we know that
    180       // only bits from X or Y are demanded.
    181 
    182       // If either the LHS or the RHS are One, the result is One.
    183       computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
    184       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
    185 
    186       // If all of the demanded bits are known zero on one side, return the
    187       // other.  These bits cannot contribute to the result of the 'or' in this
    188       // context.
    189       if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
    190           (DemandedMask & ~LHSKnownOne))
    191         return I->getOperand(0);
    192       if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
    193           (DemandedMask & ~RHSKnownOne))
    194         return I->getOperand(1);
    195 
    196       // If all of the potentially set bits on one side are known to be set on
    197       // the other side, just use the 'other' side.
    198       if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
    199           (DemandedMask & (~RHSKnownZero)))
    200         return I->getOperand(0);
    201       if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
    202           (DemandedMask & (~LHSKnownZero)))
    203         return I->getOperand(1);
    204     } else if (I->getOpcode() == Instruction::Xor) {
    205       // We can simplify (X^Y) -> X or Y in the user's context if we know that
    206       // only bits from X or Y are demanded.
    207 
    208       computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
    209       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
    210 
    211       // If all of the demanded bits are known zero on one side, return the
    212       // other.
    213       if ((DemandedMask & RHSKnownZero) == DemandedMask)
    214         return I->getOperand(0);
    215       if ((DemandedMask & LHSKnownZero) == DemandedMask)
    216         return I->getOperand(1);
    217     }
    218 
    219     // Compute the KnownZero/KnownOne bits to simplify things downstream.
    220     computeKnownBits(I, KnownZero, KnownOne, Depth);
    221     return nullptr;
    222   }
    223 
    224   // If this is the root being simplified, allow it to have multiple uses,
    225   // just set the DemandedMask to all bits so that we can try to simplify the
    226   // operands.  This allows visitTruncInst (for example) to simplify the
    227   // operand of a trunc without duplicating all the logic below.
    228   if (Depth == 0 && !V->hasOneUse())
    229     DemandedMask = APInt::getAllOnesValue(BitWidth);
    230 
    231   switch (I->getOpcode()) {
    232   default:
    233     computeKnownBits(I, KnownZero, KnownOne, Depth);
    234     break;
    235   case Instruction::And:
    236     // If either the LHS or the RHS are Zero, the result is zero.
    237     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
    238                              RHSKnownZero, RHSKnownOne, Depth+1) ||
    239         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
    240                              LHSKnownZero, LHSKnownOne, Depth+1))
    241       return I;
    242     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
    243     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
    244 
    245     // If all of the demanded bits are known 1 on one side, return the other.
    246     // These bits cannot contribute to the result of the 'and'.
    247     if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
    248         (DemandedMask & ~LHSKnownZero))
    249       return I->getOperand(0);
    250     if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
    251         (DemandedMask & ~RHSKnownZero))
    252       return I->getOperand(1);
    253 
    254     // If all of the demanded bits in the inputs are known zeros, return zero.
    255     if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
    256       return Constant::getNullValue(VTy);
    257 
    258     // If the RHS is a constant, see if we can simplify it.
    259     if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
    260       return I;
    261 
    262     // Output known-1 bits are only known if set in both the LHS & RHS.
    263     KnownOne = RHSKnownOne & LHSKnownOne;
    264     // Output known-0 are known to be clear if zero in either the LHS | RHS.
    265     KnownZero = RHSKnownZero | LHSKnownZero;
    266     break;
    267   case Instruction::Or:
    268     // If either the LHS or the RHS are One, the result is One.
    269     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
    270                              RHSKnownZero, RHSKnownOne, Depth+1) ||
    271         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
    272                              LHSKnownZero, LHSKnownOne, Depth+1))
    273       return I;
    274     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
    275     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
    276 
    277     // If all of the demanded bits are known zero on one side, return the other.
    278     // These bits cannot contribute to the result of the 'or'.
    279     if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
    280         (DemandedMask & ~LHSKnownOne))
    281       return I->getOperand(0);
    282     if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
    283         (DemandedMask & ~RHSKnownOne))
    284       return I->getOperand(1);
    285 
    286     // If all of the potentially set bits on one side are known to be set on
    287     // the other side, just use the 'other' side.
    288     if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
    289         (DemandedMask & (~RHSKnownZero)))
    290       return I->getOperand(0);
    291     if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
    292         (DemandedMask & (~LHSKnownZero)))
    293       return I->getOperand(1);
    294 
    295     // If the RHS is a constant, see if we can simplify it.
    296     if (ShrinkDemandedConstant(I, 1, DemandedMask))
    297       return I;
    298 
    299     // Output known-0 bits are only known if clear in both the LHS & RHS.
    300     KnownZero = RHSKnownZero & LHSKnownZero;
    301     // Output known-1 are known to be set if set in either the LHS | RHS.
    302     KnownOne = RHSKnownOne | LHSKnownOne;
    303     break;
    304   case Instruction::Xor: {
    305     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
    306                              RHSKnownZero, RHSKnownOne, Depth+1) ||
    307         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
    308                              LHSKnownZero, LHSKnownOne, Depth+1))
    309       return I;
    310     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
    311     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
    312 
    313     // If all of the demanded bits are known zero on one side, return the other.
    314     // These bits cannot contribute to the result of the 'xor'.
    315     if ((DemandedMask & RHSKnownZero) == DemandedMask)
    316       return I->getOperand(0);
    317     if ((DemandedMask & LHSKnownZero) == DemandedMask)
    318       return I->getOperand(1);
    319 
    320     // If all of the demanded bits are known to be zero on one side or the
    321     // other, turn this into an *inclusive* or.
    322     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
    323     if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
    324       Instruction *Or =
    325         BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
    326                                  I->getName());
    327       return InsertNewInstWith(Or, *I);
    328     }
    329 
    330     // If all of the demanded bits on one side are known, and all of the set
    331     // bits on that side are also known to be set on the other side, turn this
    332     // into an AND, as we know the bits will be cleared.
    333     //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
    334     if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
    335       // all known
    336       if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
    337         Constant *AndC = Constant::getIntegerValue(VTy,
    338                                                    ~RHSKnownOne & DemandedMask);
    339         Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
    340         return InsertNewInstWith(And, *I);
    341       }
    342     }
    343 
    344     // If the RHS is a constant, see if we can simplify it.
    345     // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
    346     if (ShrinkDemandedConstant(I, 1, DemandedMask))
    347       return I;
    348 
    349     // If our LHS is an 'and' and if it has one use, and if any of the bits we
    350     // are flipping are known to be set, then the xor is just resetting those
    351     // bits to zero.  We can just knock out bits from the 'and' and the 'xor',
    352     // simplifying both of them.
    353     if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
    354       if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
    355           isa<ConstantInt>(I->getOperand(1)) &&
    356           isa<ConstantInt>(LHSInst->getOperand(1)) &&
    357           (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
    358         ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
    359         ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
    360         APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
    361 
    362         Constant *AndC =
    363           ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
    364         Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
    365         InsertNewInstWith(NewAnd, *I);
    366 
    367         Constant *XorC =
    368           ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
    369         Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
    370         return InsertNewInstWith(NewXor, *I);
    371       }
    372 
    373     // Output known-0 bits are known if clear or set in both the LHS & RHS.
    374     KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne);
    375     // Output known-1 are known to be set if set in only one of the LHS, RHS.
    376     KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero);
    377     break;
    378   }
    379   case Instruction::Select:
    380     if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask,
    381                              RHSKnownZero, RHSKnownOne, Depth+1) ||
    382         SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
    383                              LHSKnownZero, LHSKnownOne, Depth+1))
    384       return I;
    385     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
    386     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
    387 
    388     // If the operands are constants, see if we can simplify them.
    389     if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
    390         ShrinkDemandedConstant(I, 2, DemandedMask))
    391       return I;
    392 
    393     // Only known if known in both the LHS and RHS.
    394     KnownOne = RHSKnownOne & LHSKnownOne;
    395     KnownZero = RHSKnownZero & LHSKnownZero;
    396     break;
    397   case Instruction::Trunc: {
    398     unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
    399     DemandedMask = DemandedMask.zext(truncBf);
    400     KnownZero = KnownZero.zext(truncBf);
    401     KnownOne = KnownOne.zext(truncBf);
    402     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
    403                              KnownZero, KnownOne, Depth+1))
    404       return I;
    405     DemandedMask = DemandedMask.trunc(BitWidth);
    406     KnownZero = KnownZero.trunc(BitWidth);
    407     KnownOne = KnownOne.trunc(BitWidth);
    408     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    409     break;
    410   }
    411   case Instruction::BitCast:
    412     if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
    413       return nullptr;  // vector->int or fp->int?
    414 
    415     if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
    416       if (VectorType *SrcVTy =
    417             dyn_cast<VectorType>(I->getOperand(0)->getType())) {
    418         if (DstVTy->getNumElements() != SrcVTy->getNumElements())
    419           // Don't touch a bitcast between vectors of different element counts.
    420           return nullptr;
    421       } else
    422         // Don't touch a scalar-to-vector bitcast.
    423         return nullptr;
    424     } else if (I->getOperand(0)->getType()->isVectorTy())
    425       // Don't touch a vector-to-scalar bitcast.
    426       return nullptr;
    427 
    428     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
    429                              KnownZero, KnownOne, Depth+1))
    430       return I;
    431     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    432     break;
    433   case Instruction::ZExt: {
    434     // Compute the bits in the result that are not present in the input.
    435     unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
    436 
    437     DemandedMask = DemandedMask.trunc(SrcBitWidth);
    438     KnownZero = KnownZero.trunc(SrcBitWidth);
    439     KnownOne = KnownOne.trunc(SrcBitWidth);
    440     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
    441                              KnownZero, KnownOne, Depth+1))
    442       return I;
    443     DemandedMask = DemandedMask.zext(BitWidth);
    444     KnownZero = KnownZero.zext(BitWidth);
    445     KnownOne = KnownOne.zext(BitWidth);
    446     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    447     // The top bits are known to be zero.
    448     KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
    449     break;
    450   }
    451   case Instruction::SExt: {
    452     // Compute the bits in the result that are not present in the input.
    453     unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
    454 
    455     APInt InputDemandedBits = DemandedMask &
    456                               APInt::getLowBitsSet(BitWidth, SrcBitWidth);
    457 
    458     APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
    459     // If any of the sign extended bits are demanded, we know that the sign
    460     // bit is demanded.
    461     if ((NewBits & DemandedMask) != 0)
    462       InputDemandedBits.setBit(SrcBitWidth-1);
    463 
    464     InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
    465     KnownZero = KnownZero.trunc(SrcBitWidth);
    466     KnownOne = KnownOne.trunc(SrcBitWidth);
    467     if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
    468                              KnownZero, KnownOne, Depth+1))
    469       return I;
    470     InputDemandedBits = InputDemandedBits.zext(BitWidth);
    471     KnownZero = KnownZero.zext(BitWidth);
    472     KnownOne = KnownOne.zext(BitWidth);
    473     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    474 
    475     // If the sign bit of the input is known set or clear, then we know the
    476     // top bits of the result.
    477 
    478     // If the input sign bit is known zero, or if the NewBits are not demanded
    479     // convert this into a zero extension.
    480     if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
    481       // Convert to ZExt cast
    482       CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
    483       return InsertNewInstWith(NewCast, *I);
    484     } else if (KnownOne[SrcBitWidth-1]) {    // Input sign bit known set
    485       KnownOne |= NewBits;
    486     }
    487     break;
    488   }
    489   case Instruction::Add: {
    490     // Figure out what the input bits are.  If the top bits of the and result
    491     // are not demanded, then the add doesn't demand them from its input
    492     // either.
    493     unsigned NLZ = DemandedMask.countLeadingZeros();
    494 
    495     // If there is a constant on the RHS, there are a variety of xformations
    496     // we can do.
    497     if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
    498       // If null, this should be simplified elsewhere.  Some of the xforms here
    499       // won't work if the RHS is zero.
    500       if (RHS->isZero())
    501         break;
    502 
    503       // If the top bit of the output is demanded, demand everything from the
    504       // input.  Otherwise, we demand all the input bits except NLZ top bits.
    505       APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
    506 
    507       // Find information about known zero/one bits in the input.
    508       if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits,
    509                                LHSKnownZero, LHSKnownOne, Depth+1))
    510         return I;
    511 
    512       // If the RHS of the add has bits set that can't affect the input, reduce
    513       // the constant.
    514       if (ShrinkDemandedConstant(I, 1, InDemandedBits))
    515         return I;
    516 
    517       // Avoid excess work.
    518       if (LHSKnownZero == 0 && LHSKnownOne == 0)
    519         break;
    520 
    521       // Turn it into OR if input bits are zero.
    522       if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
    523         Instruction *Or =
    524           BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
    525                                    I->getName());
    526         return InsertNewInstWith(Or, *I);
    527       }
    528 
    529       // We can say something about the output known-zero and known-one bits,
    530       // depending on potential carries from the input constant and the
    531       // unknowns.  For example if the LHS is known to have at most the 0x0F0F0
    532       // bits set and the RHS constant is 0x01001, then we know we have a known
    533       // one mask of 0x00001 and a known zero mask of 0xE0F0E.
    534 
    535       // To compute this, we first compute the potential carry bits.  These are
    536       // the bits which may be modified.  I'm not aware of a better way to do
    537       // this scan.
    538       const APInt &RHSVal = RHS->getValue();
    539       APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
    540 
    541       // Now that we know which bits have carries, compute the known-1/0 sets.
    542 
    543       // Bits are known one if they are known zero in one operand and one in the
    544       // other, and there is no input carry.
    545       KnownOne = ((LHSKnownZero & RHSVal) |
    546                   (LHSKnownOne & ~RHSVal)) & ~CarryBits;
    547 
    548       // Bits are known zero if they are known zero in both operands and there
    549       // is no input carry.
    550       KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
    551     } else {
    552       // If the high-bits of this ADD are not demanded, then it does not demand
    553       // the high bits of its LHS or RHS.
    554       if (DemandedMask[BitWidth-1] == 0) {
    555         // Right fill the mask of bits for this ADD to demand the most
    556         // significant bit and all those below it.
    557         APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
    558         if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
    559                                  LHSKnownZero, LHSKnownOne, Depth+1) ||
    560             SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
    561                                  LHSKnownZero, LHSKnownOne, Depth+1))
    562           return I;
    563       }
    564     }
    565     break;
    566   }
    567   case Instruction::Sub:
    568     // If the high-bits of this SUB are not demanded, then it does not demand
    569     // the high bits of its LHS or RHS.
    570     if (DemandedMask[BitWidth-1] == 0) {
    571       // Right fill the mask of bits for this SUB to demand the most
    572       // significant bit and all those below it.
    573       uint32_t NLZ = DemandedMask.countLeadingZeros();
    574       APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
    575       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
    576                                LHSKnownZero, LHSKnownOne, Depth+1) ||
    577           SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
    578                                LHSKnownZero, LHSKnownOne, Depth+1))
    579         return I;
    580     }
    581 
    582     // Otherwise just hand the sub off to computeKnownBits to fill in
    583     // the known zeros and ones.
    584     computeKnownBits(V, KnownZero, KnownOne, Depth);
    585 
    586     // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
    587     // zero.
    588     if (ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(0))) {
    589       APInt I0 = C0->getValue();
    590       if ((I0 + 1).isPowerOf2() && (I0 | KnownZero).isAllOnesValue()) {
    591         Instruction *Xor = BinaryOperator::CreateXor(I->getOperand(1), C0);
    592         return InsertNewInstWith(Xor, *I);
    593       }
    594     }
    595     break;
    596   case Instruction::Shl:
    597     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
    598       {
    599         Value *VarX; ConstantInt *C1;
    600         if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) {
    601           Instruction *Shr = cast<Instruction>(I->getOperand(0));
    602           Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask,
    603                                                 KnownZero, KnownOne);
    604           if (R)
    605             return R;
    606         }
    607       }
    608 
    609       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
    610       APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
    611 
    612       // If the shift is NUW/NSW, then it does demand the high bits.
    613       ShlOperator *IOp = cast<ShlOperator>(I);
    614       if (IOp->hasNoSignedWrap())
    615         DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
    616       else if (IOp->hasNoUnsignedWrap())
    617         DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
    618 
    619       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
    620                                KnownZero, KnownOne, Depth+1))
    621         return I;
    622       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    623       KnownZero <<= ShiftAmt;
    624       KnownOne  <<= ShiftAmt;
    625       // low bits known zero.
    626       if (ShiftAmt)
    627         KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
    628     }
    629     break;
    630   case Instruction::LShr:
    631     // For a logical shift right
    632     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
    633       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
    634 
    635       // Unsigned shift right.
    636       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
    637 
    638       // If the shift is exact, then it does demand the low bits (and knows that
    639       // they are zero).
    640       if (cast<LShrOperator>(I)->isExact())
    641         DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
    642 
    643       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
    644                                KnownZero, KnownOne, Depth+1))
    645         return I;
    646       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    647       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
    648       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
    649       if (ShiftAmt) {
    650         // Compute the new bits that are at the top now.
    651         APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
    652         KnownZero |= HighBits;  // high bits known zero.
    653       }
    654     }
    655     break;
    656   case Instruction::AShr:
    657     // If this is an arithmetic shift right and only the low-bit is set, we can
    658     // always convert this into a logical shr, even if the shift amount is
    659     // variable.  The low bit of the shift cannot be an input sign bit unless
    660     // the shift amount is >= the size of the datatype, which is undefined.
    661     if (DemandedMask == 1) {
    662       // Perform the logical shift right.
    663       Instruction *NewVal = BinaryOperator::CreateLShr(
    664                         I->getOperand(0), I->getOperand(1), I->getName());
    665       return InsertNewInstWith(NewVal, *I);
    666     }
    667 
    668     // If the sign bit is the only bit demanded by this ashr, then there is no
    669     // need to do it, the shift doesn't change the high bit.
    670     if (DemandedMask.isSignBit())
    671       return I->getOperand(0);
    672 
    673     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
    674       uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
    675 
    676       // Signed shift right.
    677       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
    678       // If any of the "high bits" are demanded, we should set the sign bit as
    679       // demanded.
    680       if (DemandedMask.countLeadingZeros() <= ShiftAmt)
    681         DemandedMaskIn.setBit(BitWidth-1);
    682 
    683       // If the shift is exact, then it does demand the low bits (and knows that
    684       // they are zero).
    685       if (cast<AShrOperator>(I)->isExact())
    686         DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
    687 
    688       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
    689                                KnownZero, KnownOne, Depth+1))
    690         return I;
    691       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    692       // Compute the new bits that are at the top now.
    693       APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
    694       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
    695       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
    696 
    697       // Handle the sign bits.
    698       APInt SignBit(APInt::getSignBit(BitWidth));
    699       // Adjust to where it is now in the mask.
    700       SignBit = APIntOps::lshr(SignBit, ShiftAmt);
    701 
    702       // If the input sign bit is known to be zero, or if none of the top bits
    703       // are demanded, turn this into an unsigned shift right.
    704       if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
    705           (HighBits & ~DemandedMask) == HighBits) {
    706         // Perform the logical shift right.
    707         BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0),
    708                                                             SA, I->getName());
    709         NewVal->setIsExact(cast<BinaryOperator>(I)->isExact());
    710         return InsertNewInstWith(NewVal, *I);
    711       } else if ((KnownOne & SignBit) != 0) { // New bits are known one.
    712         KnownOne |= HighBits;
    713       }
    714     }
    715     break;
    716   case Instruction::SRem:
    717     if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
    718       // X % -1 demands all the bits because we don't want to introduce
    719       // INT_MIN % -1 (== undef) by accident.
    720       if (Rem->isAllOnesValue())
    721         break;
    722       APInt RA = Rem->getValue().abs();
    723       if (RA.isPowerOf2()) {
    724         if (DemandedMask.ult(RA))    // srem won't affect demanded bits
    725           return I->getOperand(0);
    726 
    727         APInt LowBits = RA - 1;
    728         APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
    729         if (SimplifyDemandedBits(I->getOperandUse(0), Mask2,
    730                                  LHSKnownZero, LHSKnownOne, Depth+1))
    731           return I;
    732 
    733         // The low bits of LHS are unchanged by the srem.
    734         KnownZero = LHSKnownZero & LowBits;
    735         KnownOne = LHSKnownOne & LowBits;
    736 
    737         // If LHS is non-negative or has all low bits zero, then the upper bits
    738         // are all zero.
    739         if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
    740           KnownZero |= ~LowBits;
    741 
    742         // If LHS is negative and not all low bits are zero, then the upper bits
    743         // are all one.
    744         if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0))
    745           KnownOne |= ~LowBits;
    746 
    747         assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    748       }
    749     }
    750 
    751     // The sign bit is the LHS's sign bit, except when the result of the
    752     // remainder is zero.
    753     if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
    754       APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
    755       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
    756       // If it's known zero, our sign bit is also zero.
    757       if (LHSKnownZero.isNegative())
    758         KnownZero.setBit(KnownZero.getBitWidth() - 1);
    759     }
    760     break;
    761   case Instruction::URem: {
    762     APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
    763     APInt AllOnes = APInt::getAllOnesValue(BitWidth);
    764     if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes,
    765                              KnownZero2, KnownOne2, Depth+1) ||
    766         SimplifyDemandedBits(I->getOperandUse(1), AllOnes,
    767                              KnownZero2, KnownOne2, Depth+1))
    768       return I;
    769 
    770     unsigned Leaders = KnownZero2.countLeadingOnes();
    771     Leaders = std::max(Leaders,
    772                        KnownZero2.countLeadingOnes());
    773     KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
    774     break;
    775   }
    776   case Instruction::Call:
    777     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
    778       switch (II->getIntrinsicID()) {
    779       default: break;
    780       case Intrinsic::bswap: {
    781         // If the only bits demanded come from one byte of the bswap result,
    782         // just shift the input byte into position to eliminate the bswap.
    783         unsigned NLZ = DemandedMask.countLeadingZeros();
    784         unsigned NTZ = DemandedMask.countTrailingZeros();
    785 
    786         // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
    787         // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
    788         // have 14 leading zeros, round to 8.
    789         NLZ &= ~7;
    790         NTZ &= ~7;
    791         // If we need exactly one byte, we can do this transformation.
    792         if (BitWidth-NLZ-NTZ == 8) {
    793           unsigned ResultBit = NTZ;
    794           unsigned InputBit = BitWidth-NTZ-8;
    795 
    796           // Replace this with either a left or right shift to get the byte into
    797           // the right place.
    798           Instruction *NewVal;
    799           if (InputBit > ResultBit)
    800             NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
    801                     ConstantInt::get(I->getType(), InputBit-ResultBit));
    802           else
    803             NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
    804                     ConstantInt::get(I->getType(), ResultBit-InputBit));
    805           NewVal->takeName(I);
    806           return InsertNewInstWith(NewVal, *I);
    807         }
    808 
    809         // TODO: Could compute known zero/one bits based on the input.
    810         break;
    811       }
    812       case Intrinsic::x86_sse42_crc32_64_64:
    813         KnownZero = APInt::getHighBitsSet(64, 32);
    814         return nullptr;
    815       }
    816     }
    817     computeKnownBits(V, KnownZero, KnownOne, Depth);
    818     break;
    819   }
    820 
    821   // If the client is only demanding bits that we know, return the known
    822   // constant.
    823   if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
    824     return Constant::getIntegerValue(VTy, KnownOne);
    825   return nullptr;
    826 }
    827 
    828 /// Helper routine of SimplifyDemandedUseBits. It tries to simplify
    829 /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
    830 /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
    831 /// of "C2-C1".
    832 ///
    833 /// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
    834 /// ..., bn}, without considering the specific value X is holding.
    835 /// This transformation is legal iff one of following conditions is hold:
    836 ///  1) All the bit in S are 0, in this case E1 == E2.
    837 ///  2) We don't care those bits in S, per the input DemandedMask.
    838 ///  3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
    839 ///     rest bits.
    840 ///
    841 /// Currently we only test condition 2).
    842 ///
    843 /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
    844 /// not successful.
    845 Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
    846   Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) {
    847 
    848   const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue();
    849   const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue();
    850   if (!ShlOp1 || !ShrOp1)
    851       return nullptr; // Noop.
    852 
    853   Value *VarX = Shr->getOperand(0);
    854   Type *Ty = VarX->getType();
    855   unsigned BitWidth = Ty->getIntegerBitWidth();
    856   if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
    857     return nullptr; // Undef.
    858 
    859   unsigned ShlAmt = ShlOp1.getZExtValue();
    860   unsigned ShrAmt = ShrOp1.getZExtValue();
    861 
    862   KnownOne.clearAllBits();
    863   KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1);
    864   KnownZero &= DemandedMask;
    865 
    866   APInt BitMask1(APInt::getAllOnesValue(BitWidth));
    867   APInt BitMask2(APInt::getAllOnesValue(BitWidth));
    868 
    869   bool isLshr = (Shr->getOpcode() == Instruction::LShr);
    870   BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
    871                       (BitMask1.ashr(ShrAmt) << ShlAmt);
    872 
    873   if (ShrAmt <= ShlAmt) {
    874     BitMask2 <<= (ShlAmt - ShrAmt);
    875   } else {
    876     BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
    877                         BitMask2.ashr(ShrAmt - ShlAmt);
    878   }
    879 
    880   // Check if condition-2 (see the comment to this function) is satified.
    881   if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
    882     if (ShrAmt == ShlAmt)
    883       return VarX;
    884 
    885     if (!Shr->hasOneUse())
    886       return nullptr;
    887 
    888     BinaryOperator *New;
    889     if (ShrAmt < ShlAmt) {
    890       Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
    891       New = BinaryOperator::CreateShl(VarX, Amt);
    892       BinaryOperator *Orig = cast<BinaryOperator>(Shl);
    893       New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
    894       New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
    895     } else {
    896       Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
    897       New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
    898                      BinaryOperator::CreateAShr(VarX, Amt);
    899       if (cast<BinaryOperator>(Shr)->isExact())
    900         New->setIsExact(true);
    901     }
    902 
    903     return InsertNewInstWith(New, *Shl);
    904   }
    905 
    906   return nullptr;
    907 }
    908 
    909 /// SimplifyDemandedVectorElts - The specified value produces a vector with
    910 /// any number of elements. DemandedElts contains the set of elements that are
    911 /// actually used by the caller.  This method analyzes which elements of the
    912 /// operand are undef and returns that information in UndefElts.
    913 ///
    914 /// If the information about demanded elements can be used to simplify the
    915 /// operation, the operation is simplified, then the resultant value is
    916 /// returned.  This returns null if no change was made.
    917 Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
    918                                                 APInt &UndefElts,
    919                                                 unsigned Depth) {
    920   unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
    921   APInt EltMask(APInt::getAllOnesValue(VWidth));
    922   assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
    923 
    924   if (isa<UndefValue>(V)) {
    925     // If the entire vector is undefined, just return this info.
    926     UndefElts = EltMask;
    927     return nullptr;
    928   }
    929 
    930   if (DemandedElts == 0) { // If nothing is demanded, provide undef.
    931     UndefElts = EltMask;
    932     return UndefValue::get(V->getType());
    933   }
    934 
    935   UndefElts = 0;
    936 
    937   // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
    938   if (Constant *C = dyn_cast<Constant>(V)) {
    939     // Check if this is identity. If so, return 0 since we are not simplifying
    940     // anything.
    941     if (DemandedElts.isAllOnesValue())
    942       return nullptr;
    943 
    944     Type *EltTy = cast<VectorType>(V->getType())->getElementType();
    945     Constant *Undef = UndefValue::get(EltTy);
    946 
    947     SmallVector<Constant*, 16> Elts;
    948     for (unsigned i = 0; i != VWidth; ++i) {
    949       if (!DemandedElts[i]) {   // If not demanded, set to undef.
    950         Elts.push_back(Undef);
    951         UndefElts.setBit(i);
    952         continue;
    953       }
    954 
    955       Constant *Elt = C->getAggregateElement(i);
    956       if (!Elt) return nullptr;
    957 
    958       if (isa<UndefValue>(Elt)) {   // Already undef.
    959         Elts.push_back(Undef);
    960         UndefElts.setBit(i);
    961       } else {                               // Otherwise, defined.
    962         Elts.push_back(Elt);
    963       }
    964     }
    965 
    966     // If we changed the constant, return it.
    967     Constant *NewCV = ConstantVector::get(Elts);
    968     return NewCV != C ? NewCV : nullptr;
    969   }
    970 
    971   // Limit search depth.
    972   if (Depth == 10)
    973     return nullptr;
    974 
    975   // If multiple users are using the root value, proceed with
    976   // simplification conservatively assuming that all elements
    977   // are needed.
    978   if (!V->hasOneUse()) {
    979     // Quit if we find multiple users of a non-root value though.
    980     // They'll be handled when it's their turn to be visited by
    981     // the main instcombine process.
    982     if (Depth != 0)
    983       // TODO: Just compute the UndefElts information recursively.
    984       return nullptr;
    985 
    986     // Conservatively assume that all elements are needed.
    987     DemandedElts = EltMask;
    988   }
    989 
    990   Instruction *I = dyn_cast<Instruction>(V);
    991   if (!I) return nullptr;        // Only analyze instructions.
    992 
    993   bool MadeChange = false;
    994   APInt UndefElts2(VWidth, 0);
    995   Value *TmpV;
    996   switch (I->getOpcode()) {
    997   default: break;
    998 
    999   case Instruction::InsertElement: {
   1000     // If this is a variable index, we don't know which element it overwrites.
   1001     // demand exactly the same input as we produce.
   1002     ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
   1003     if (!Idx) {
   1004       // Note that we can't propagate undef elt info, because we don't know
   1005       // which elt is getting updated.
   1006       TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
   1007                                         UndefElts2, Depth+1);
   1008       if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
   1009       break;
   1010     }
   1011 
   1012     // If this is inserting an element that isn't demanded, remove this
   1013     // insertelement.
   1014     unsigned IdxNo = Idx->getZExtValue();
   1015     if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
   1016       Worklist.Add(I);
   1017       return I->getOperand(0);
   1018     }
   1019 
   1020     // Otherwise, the element inserted overwrites whatever was there, so the
   1021     // input demanded set is simpler than the output set.
   1022     APInt DemandedElts2 = DemandedElts;
   1023     DemandedElts2.clearBit(IdxNo);
   1024     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
   1025                                       UndefElts, Depth+1);
   1026     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
   1027 
   1028     // The inserted element is defined.
   1029     UndefElts.clearBit(IdxNo);
   1030     break;
   1031   }
   1032   case Instruction::ShuffleVector: {
   1033     ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
   1034     uint64_t LHSVWidth =
   1035       cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
   1036     APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
   1037     for (unsigned i = 0; i < VWidth; i++) {
   1038       if (DemandedElts[i]) {
   1039         unsigned MaskVal = Shuffle->getMaskValue(i);
   1040         if (MaskVal != -1u) {
   1041           assert(MaskVal < LHSVWidth * 2 &&
   1042                  "shufflevector mask index out of range!");
   1043           if (MaskVal < LHSVWidth)
   1044             LeftDemanded.setBit(MaskVal);
   1045           else
   1046             RightDemanded.setBit(MaskVal - LHSVWidth);
   1047         }
   1048       }
   1049     }
   1050 
   1051     APInt UndefElts4(LHSVWidth, 0);
   1052     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
   1053                                       UndefElts4, Depth+1);
   1054     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
   1055 
   1056     APInt UndefElts3(LHSVWidth, 0);
   1057     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
   1058                                       UndefElts3, Depth+1);
   1059     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
   1060 
   1061     bool NewUndefElts = false;
   1062     for (unsigned i = 0; i < VWidth; i++) {
   1063       unsigned MaskVal = Shuffle->getMaskValue(i);
   1064       if (MaskVal == -1u) {
   1065         UndefElts.setBit(i);
   1066       } else if (!DemandedElts[i]) {
   1067         NewUndefElts = true;
   1068         UndefElts.setBit(i);
   1069       } else if (MaskVal < LHSVWidth) {
   1070         if (UndefElts4[MaskVal]) {
   1071           NewUndefElts = true;
   1072           UndefElts.setBit(i);
   1073         }
   1074       } else {
   1075         if (UndefElts3[MaskVal - LHSVWidth]) {
   1076           NewUndefElts = true;
   1077           UndefElts.setBit(i);
   1078         }
   1079       }
   1080     }
   1081 
   1082     if (NewUndefElts) {
   1083       // Add additional discovered undefs.
   1084       SmallVector<Constant*, 16> Elts;
   1085       for (unsigned i = 0; i < VWidth; ++i) {
   1086         if (UndefElts[i])
   1087           Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
   1088         else
   1089           Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
   1090                                           Shuffle->getMaskValue(i)));
   1091       }
   1092       I->setOperand(2, ConstantVector::get(Elts));
   1093       MadeChange = true;
   1094     }
   1095     break;
   1096   }
   1097   case Instruction::Select: {
   1098     APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
   1099     if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
   1100       for (unsigned i = 0; i < VWidth; i++) {
   1101         if (CV->getAggregateElement(i)->isNullValue())
   1102           LeftDemanded.clearBit(i);
   1103         else
   1104           RightDemanded.clearBit(i);
   1105       }
   1106     }
   1107 
   1108     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded,
   1109                                       UndefElts, Depth+1);
   1110     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
   1111 
   1112     TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
   1113                                       UndefElts2, Depth+1);
   1114     if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
   1115 
   1116     // Output elements are undefined if both are undefined.
   1117     UndefElts &= UndefElts2;
   1118     break;
   1119   }
   1120   case Instruction::BitCast: {
   1121     // Vector->vector casts only.
   1122     VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
   1123     if (!VTy) break;
   1124     unsigned InVWidth = VTy->getNumElements();
   1125     APInt InputDemandedElts(InVWidth, 0);
   1126     unsigned Ratio;
   1127 
   1128     if (VWidth == InVWidth) {
   1129       // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
   1130       // elements as are demanded of us.
   1131       Ratio = 1;
   1132       InputDemandedElts = DemandedElts;
   1133     } else if (VWidth > InVWidth) {
   1134       // Untested so far.
   1135       break;
   1136 
   1137       // If there are more elements in the result than there are in the source,
   1138       // then an input element is live if any of the corresponding output
   1139       // elements are live.
   1140       Ratio = VWidth/InVWidth;
   1141       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
   1142         if (DemandedElts[OutIdx])
   1143           InputDemandedElts.setBit(OutIdx/Ratio);
   1144       }
   1145     } else {
   1146       // Untested so far.
   1147       break;
   1148 
   1149       // If there are more elements in the source than there are in the result,
   1150       // then an input element is live if the corresponding output element is
   1151       // live.
   1152       Ratio = InVWidth/VWidth;
   1153       for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
   1154         if (DemandedElts[InIdx/Ratio])
   1155           InputDemandedElts.setBit(InIdx);
   1156     }
   1157 
   1158     // div/rem demand all inputs, because they don't want divide by zero.
   1159     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
   1160                                       UndefElts2, Depth+1);
   1161     if (TmpV) {
   1162       I->setOperand(0, TmpV);
   1163       MadeChange = true;
   1164     }
   1165 
   1166     UndefElts = UndefElts2;
   1167     if (VWidth > InVWidth) {
   1168       llvm_unreachable("Unimp");
   1169       // If there are more elements in the result than there are in the source,
   1170       // then an output element is undef if the corresponding input element is
   1171       // undef.
   1172       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
   1173         if (UndefElts2[OutIdx/Ratio])
   1174           UndefElts.setBit(OutIdx);
   1175     } else if (VWidth < InVWidth) {
   1176       llvm_unreachable("Unimp");
   1177       // If there are more elements in the source than there are in the result,
   1178       // then a result element is undef if all of the corresponding input
   1179       // elements are undef.
   1180       UndefElts = ~0ULL >> (64-VWidth);  // Start out all undef.
   1181       for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
   1182         if (!UndefElts2[InIdx])            // Not undef?
   1183           UndefElts.clearBit(InIdx/Ratio);    // Clear undef bit.
   1184     }
   1185     break;
   1186   }
   1187   case Instruction::And:
   1188   case Instruction::Or:
   1189   case Instruction::Xor:
   1190   case Instruction::Add:
   1191   case Instruction::Sub:
   1192   case Instruction::Mul:
   1193     // div/rem demand all inputs, because they don't want divide by zero.
   1194     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
   1195                                       UndefElts, Depth+1);
   1196     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
   1197     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
   1198                                       UndefElts2, Depth+1);
   1199     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
   1200 
   1201     // Output elements are undefined if both are undefined.  Consider things
   1202     // like undef&0.  The result is known zero, not undef.
   1203     UndefElts &= UndefElts2;
   1204     break;
   1205   case Instruction::FPTrunc:
   1206   case Instruction::FPExt:
   1207     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
   1208                                       UndefElts, Depth+1);
   1209     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
   1210     break;
   1211 
   1212   case Instruction::Call: {
   1213     IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
   1214     if (!II) break;
   1215     switch (II->getIntrinsicID()) {
   1216     default: break;
   1217 
   1218     // Binary vector operations that work column-wise.  A dest element is a
   1219     // function of the corresponding input elements from the two inputs.
   1220     case Intrinsic::x86_sse_sub_ss:
   1221     case Intrinsic::x86_sse_mul_ss:
   1222     case Intrinsic::x86_sse_min_ss:
   1223     case Intrinsic::x86_sse_max_ss:
   1224     case Intrinsic::x86_sse2_sub_sd:
   1225     case Intrinsic::x86_sse2_mul_sd:
   1226     case Intrinsic::x86_sse2_min_sd:
   1227     case Intrinsic::x86_sse2_max_sd:
   1228       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
   1229                                         UndefElts, Depth+1);
   1230       if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
   1231       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
   1232                                         UndefElts2, Depth+1);
   1233       if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
   1234 
   1235       // If only the low elt is demanded and this is a scalarizable intrinsic,
   1236       // scalarize it now.
   1237       if (DemandedElts == 1) {
   1238         switch (II->getIntrinsicID()) {
   1239         default: break;
   1240         case Intrinsic::x86_sse_sub_ss:
   1241         case Intrinsic::x86_sse_mul_ss:
   1242         case Intrinsic::x86_sse2_sub_sd:
   1243         case Intrinsic::x86_sse2_mul_sd:
   1244           // TODO: Lower MIN/MAX/ABS/etc
   1245           Value *LHS = II->getArgOperand(0);
   1246           Value *RHS = II->getArgOperand(1);
   1247           // Extract the element as scalars.
   1248           LHS = InsertNewInstWith(ExtractElementInst::Create(LHS,
   1249             ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
   1250           RHS = InsertNewInstWith(ExtractElementInst::Create(RHS,
   1251             ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
   1252 
   1253           switch (II->getIntrinsicID()) {
   1254           default: llvm_unreachable("Case stmts out of sync!");
   1255           case Intrinsic::x86_sse_sub_ss:
   1256           case Intrinsic::x86_sse2_sub_sd:
   1257             TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS,
   1258                                                         II->getName()), *II);
   1259             break;
   1260           case Intrinsic::x86_sse_mul_ss:
   1261           case Intrinsic::x86_sse2_mul_sd:
   1262             TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS,
   1263                                                          II->getName()), *II);
   1264             break;
   1265           }
   1266 
   1267           Instruction *New =
   1268             InsertElementInst::Create(
   1269               UndefValue::get(II->getType()), TmpV,
   1270               ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false),
   1271                                       II->getName());
   1272           InsertNewInstWith(New, *II);
   1273           return New;
   1274         }
   1275       }
   1276 
   1277       // Output elements are undefined if both are undefined.  Consider things
   1278       // like undef&0.  The result is known zero, not undef.
   1279       UndefElts &= UndefElts2;
   1280       break;
   1281     }
   1282     break;
   1283   }
   1284   }
   1285   return MadeChange ? I : nullptr;
   1286 }
   1287