Home | History | Annotate | Download | only in InstCombine
      1 //===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains logic for simplifying instructions based on information
     11 // about how they are used.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "InstCombineInternal.h"
     16 #include "llvm/IR/IntrinsicInst.h"
     17 #include "llvm/IR/PatternMatch.h"
     18 
     19 using namespace llvm;
     20 using namespace llvm::PatternMatch;
     21 
     22 #define DEBUG_TYPE "instcombine"
     23 
     24 /// ShrinkDemandedConstant - Check to see if the specified operand of the
     25 /// specified instruction is a constant integer.  If so, check to see if there
     26 /// are any bits set in the constant that are not demanded.  If so, shrink the
     27 /// constant and return true.
     28 static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
     29                                    APInt Demanded) {
     30   assert(I && "No instruction?");
     31   assert(OpNo < I->getNumOperands() && "Operand index too large");
     32 
     33   // If the operand is not a constant integer, nothing to do.
     34   ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo));
     35   if (!OpC) return false;
     36 
     37   // If there are no bits set that aren't demanded, nothing to do.
     38   Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
     39   if ((~Demanded & OpC->getValue()) == 0)
     40     return false;
     41 
     42   // This instruction is producing bits that are not demanded. Shrink the RHS.
     43   Demanded &= OpC->getValue();
     44   I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
     45 
     46   // If either 'nsw' or 'nuw' is set and the constant is negative,
     47   // removing *any* bits from the constant could make overflow occur.
     48   // Remove 'nsw' and 'nuw' from the instruction in this case.
     49   if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I)) {
     50     assert(OBO->getOpcode() == Instruction::Add);
     51     if (OBO->hasNoSignedWrap() || OBO->hasNoUnsignedWrap()) {
     52       if (OpC->getValue().isNegative()) {
     53         cast<BinaryOperator>(OBO)->setHasNoSignedWrap(false);
     54         cast<BinaryOperator>(OBO)->setHasNoUnsignedWrap(false);
     55       }
     56     }
     57   }
     58 
     59   return true;
     60 }
     61 
     62 
     63 
     64 /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
     65 /// SimplifyDemandedBits knows about.  See if the instruction has any
     66 /// properties that allow us to simplify its operands.
     67 bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
     68   unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
     69   APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
     70   APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
     71 
     72   Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne,
     73                                      0, &Inst);
     74   if (!V) return false;
     75   if (V == &Inst) return true;
     76   ReplaceInstUsesWith(Inst, V);
     77   return true;
     78 }
     79 
     80 /// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
     81 /// specified instruction operand if possible, updating it in place.  It returns
     82 /// true if it made any change and false otherwise.
     83 bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
     84                                         APInt &KnownZero, APInt &KnownOne,
     85                                         unsigned Depth) {
     86   Value *NewVal =
     87       SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero, KnownOne, Depth,
     88                               dyn_cast<Instruction>(U.getUser()));
     89   if (!NewVal) return false;
     90   U = NewVal;
     91   return true;
     92 }
     93 
     94 
     95 /// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
     96 /// value based on the demanded bits.  When this function is called, it is known
     97 /// that only the bits set in DemandedMask of the result of V are ever used
     98 /// downstream. Consequently, depending on the mask and V, it may be possible
     99 /// to replace V with a constant or one of its operands. In such cases, this
    100 /// function does the replacement and returns true. In all other cases, it
    101 /// returns false after analyzing the expression and setting KnownOne and known
    102 /// to be one in the expression.  KnownZero contains all the bits that are known
    103 /// to be zero in the expression. These are provided to potentially allow the
    104 /// caller (which might recursively be SimplifyDemandedBits itself) to simplify
    105 /// the expression. KnownOne and KnownZero always follow the invariant that
    106 /// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
    107 /// the bits in KnownOne and KnownZero may only be accurate for those bits set
    108 /// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
    109 /// and KnownOne must all be the same.
    110 ///
    111 /// This returns null if it did not change anything and it permits no
    112 /// simplification.  This returns V itself if it did some simplification of V's
    113 /// operands based on the information about what bits are demanded. This returns
    114 /// some other non-null value if it found out that V is equal to another value
    115 /// in the context where the specified bits are demanded, but not for all users.
    116 Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
    117                                              APInt &KnownZero, APInt &KnownOne,
    118                                              unsigned Depth,
    119                                              Instruction *CxtI) {
    120   assert(V != nullptr && "Null pointer of Value???");
    121   assert(Depth <= 6 && "Limit Search Depth");
    122   uint32_t BitWidth = DemandedMask.getBitWidth();
    123   Type *VTy = V->getType();
    124   assert(
    125       (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&
    126       KnownZero.getBitWidth() == BitWidth &&
    127       KnownOne.getBitWidth() == BitWidth &&
    128       "Value *V, DemandedMask, KnownZero and KnownOne "
    129       "must have same BitWidth");
    130   if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
    131     // We know all of the bits for a constant!
    132     KnownOne = CI->getValue() & DemandedMask;
    133     KnownZero = ~KnownOne & DemandedMask;
    134     return nullptr;
    135   }
    136   if (isa<ConstantPointerNull>(V)) {
    137     // We know all of the bits for a constant!
    138     KnownOne.clearAllBits();
    139     KnownZero = DemandedMask;
    140     return nullptr;
    141   }
    142 
    143   KnownZero.clearAllBits();
    144   KnownOne.clearAllBits();
    145   if (DemandedMask == 0) {   // Not demanding any bits from V.
    146     if (isa<UndefValue>(V))
    147       return nullptr;
    148     return UndefValue::get(VTy);
    149   }
    150 
    151   if (Depth == 6)        // Limit search depth.
    152     return nullptr;
    153 
    154   APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
    155   APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
    156 
    157   Instruction *I = dyn_cast<Instruction>(V);
    158   if (!I) {
    159     computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
    160     return nullptr;        // Only analyze instructions.
    161   }
    162 
    163   // If there are multiple uses of this value and we aren't at the root, then
    164   // we can't do any simplifications of the operands, because DemandedMask
    165   // only reflects the bits demanded by *one* of the users.
    166   if (Depth != 0 && !I->hasOneUse()) {
    167     // Despite the fact that we can't simplify this instruction in all User's
    168     // context, we can at least compute the knownzero/knownone bits, and we can
    169     // do simplifications that apply to *just* the one user if we know that
    170     // this instruction has a simpler value in that context.
    171     if (I->getOpcode() == Instruction::And) {
    172       // If either the LHS or the RHS are Zero, the result is zero.
    173       computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
    174                        CxtI);
    175       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
    176                        CxtI);
    177 
    178       // If all of the demanded bits are known 1 on one side, return the other.
    179       // These bits cannot contribute to the result of the 'and' in this
    180       // context.
    181       if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
    182           (DemandedMask & ~LHSKnownZero))
    183         return I->getOperand(0);
    184       if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
    185           (DemandedMask & ~RHSKnownZero))
    186         return I->getOperand(1);
    187 
    188       // If all of the demanded bits in the inputs are known zeros, return zero.
    189       if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
    190         return Constant::getNullValue(VTy);
    191 
    192     } else if (I->getOpcode() == Instruction::Or) {
    193       // We can simplify (X|Y) -> X or Y in the user's context if we know that
    194       // only bits from X or Y are demanded.
    195 
    196       // If either the LHS or the RHS are One, the result is One.
    197       computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
    198                        CxtI);
    199       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
    200                        CxtI);
    201 
    202       // If all of the demanded bits are known zero on one side, return the
    203       // other.  These bits cannot contribute to the result of the 'or' in this
    204       // context.
    205       if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
    206           (DemandedMask & ~LHSKnownOne))
    207         return I->getOperand(0);
    208       if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
    209           (DemandedMask & ~RHSKnownOne))
    210         return I->getOperand(1);
    211 
    212       // If all of the potentially set bits on one side are known to be set on
    213       // the other side, just use the 'other' side.
    214       if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
    215           (DemandedMask & (~RHSKnownZero)))
    216         return I->getOperand(0);
    217       if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
    218           (DemandedMask & (~LHSKnownZero)))
    219         return I->getOperand(1);
    220     } else if (I->getOpcode() == Instruction::Xor) {
    221       // We can simplify (X^Y) -> X or Y in the user's context if we know that
    222       // only bits from X or Y are demanded.
    223 
    224       computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
    225                        CxtI);
    226       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
    227                        CxtI);
    228 
    229       // If all of the demanded bits are known zero on one side, return the
    230       // other.
    231       if ((DemandedMask & RHSKnownZero) == DemandedMask)
    232         return I->getOperand(0);
    233       if ((DemandedMask & LHSKnownZero) == DemandedMask)
    234         return I->getOperand(1);
    235     }
    236 
    237     // Compute the KnownZero/KnownOne bits to simplify things downstream.
    238     computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI);
    239     return nullptr;
    240   }
    241 
    242   // If this is the root being simplified, allow it to have multiple uses,
    243   // just set the DemandedMask to all bits so that we can try to simplify the
    244   // operands.  This allows visitTruncInst (for example) to simplify the
    245   // operand of a trunc without duplicating all the logic below.
    246   if (Depth == 0 && !V->hasOneUse())
    247     DemandedMask = APInt::getAllOnesValue(BitWidth);
    248 
    249   switch (I->getOpcode()) {
    250   default:
    251     computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI);
    252     break;
    253   case Instruction::And:
    254     // If either the LHS or the RHS are Zero, the result is zero.
    255     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
    256                              RHSKnownOne, Depth + 1) ||
    257         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
    258                              LHSKnownZero, LHSKnownOne, Depth + 1))
    259       return I;
    260     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
    261     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
    262 
    263     // If the client is only demanding bits that we know, return the known
    264     // constant.
    265     if ((DemandedMask & ((RHSKnownZero | LHSKnownZero)|
    266                          (RHSKnownOne & LHSKnownOne))) == DemandedMask)
    267       return Constant::getIntegerValue(VTy, RHSKnownOne & LHSKnownOne);
    268 
    269     // If all of the demanded bits are known 1 on one side, return the other.
    270     // These bits cannot contribute to the result of the 'and'.
    271     if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
    272         (DemandedMask & ~LHSKnownZero))
    273       return I->getOperand(0);
    274     if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
    275         (DemandedMask & ~RHSKnownZero))
    276       return I->getOperand(1);
    277 
    278     // If all of the demanded bits in the inputs are known zeros, return zero.
    279     if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
    280       return Constant::getNullValue(VTy);
    281 
    282     // If the RHS is a constant, see if we can simplify it.
    283     if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
    284       return I;
    285 
    286     // Output known-1 bits are only known if set in both the LHS & RHS.
    287     KnownOne = RHSKnownOne & LHSKnownOne;
    288     // Output known-0 are known to be clear if zero in either the LHS | RHS.
    289     KnownZero = RHSKnownZero | LHSKnownZero;
    290     break;
    291   case Instruction::Or:
    292     // If either the LHS or the RHS are One, the result is One.
    293     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
    294                              RHSKnownOne, Depth + 1) ||
    295         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
    296                              LHSKnownZero, LHSKnownOne, Depth + 1))
    297       return I;
    298     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
    299     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
    300 
    301     // If the client is only demanding bits that we know, return the known
    302     // constant.
    303     if ((DemandedMask & ((RHSKnownZero & LHSKnownZero)|
    304                          (RHSKnownOne | LHSKnownOne))) == DemandedMask)
    305       return Constant::getIntegerValue(VTy, RHSKnownOne | LHSKnownOne);
    306 
    307     // If all of the demanded bits are known zero on one side, return the other.
    308     // These bits cannot contribute to the result of the 'or'.
    309     if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
    310         (DemandedMask & ~LHSKnownOne))
    311       return I->getOperand(0);
    312     if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
    313         (DemandedMask & ~RHSKnownOne))
    314       return I->getOperand(1);
    315 
    316     // If all of the potentially set bits on one side are known to be set on
    317     // the other side, just use the 'other' side.
    318     if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
    319         (DemandedMask & (~RHSKnownZero)))
    320       return I->getOperand(0);
    321     if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
    322         (DemandedMask & (~LHSKnownZero)))
    323       return I->getOperand(1);
    324 
    325     // If the RHS is a constant, see if we can simplify it.
    326     if (ShrinkDemandedConstant(I, 1, DemandedMask))
    327       return I;
    328 
    329     // Output known-0 bits are only known if clear in both the LHS & RHS.
    330     KnownZero = RHSKnownZero & LHSKnownZero;
    331     // Output known-1 are known to be set if set in either the LHS | RHS.
    332     KnownOne = RHSKnownOne | LHSKnownOne;
    333     break;
    334   case Instruction::Xor: {
    335     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
    336                              RHSKnownOne, Depth + 1) ||
    337         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, LHSKnownZero,
    338                              LHSKnownOne, Depth + 1))
    339       return I;
    340     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
    341     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
    342 
    343     // Output known-0 bits are known if clear or set in both the LHS & RHS.
    344     APInt IKnownZero = (RHSKnownZero & LHSKnownZero) |
    345                        (RHSKnownOne & LHSKnownOne);
    346     // Output known-1 are known to be set if set in only one of the LHS, RHS.
    347     APInt IKnownOne =  (RHSKnownZero & LHSKnownOne) |
    348                        (RHSKnownOne & LHSKnownZero);
    349 
    350     // If the client is only demanding bits that we know, return the known
    351     // constant.
    352     if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
    353       return Constant::getIntegerValue(VTy, IKnownOne);
    354 
    355     // If all of the demanded bits are known zero on one side, return the other.
    356     // These bits cannot contribute to the result of the 'xor'.
    357     if ((DemandedMask & RHSKnownZero) == DemandedMask)
    358       return I->getOperand(0);
    359     if ((DemandedMask & LHSKnownZero) == DemandedMask)
    360       return I->getOperand(1);
    361 
    362     // If all of the demanded bits are known to be zero on one side or the
    363     // other, turn this into an *inclusive* or.
    364     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
    365     if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
    366       Instruction *Or =
    367         BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
    368                                  I->getName());
    369       return InsertNewInstWith(Or, *I);
    370     }
    371 
    372     // If all of the demanded bits on one side are known, and all of the set
    373     // bits on that side are also known to be set on the other side, turn this
    374     // into an AND, as we know the bits will be cleared.
    375     //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
    376     if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
    377       // all known
    378       if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
    379         Constant *AndC = Constant::getIntegerValue(VTy,
    380                                                    ~RHSKnownOne & DemandedMask);
    381         Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
    382         return InsertNewInstWith(And, *I);
    383       }
    384     }
    385 
    386     // If the RHS is a constant, see if we can simplify it.
    387     // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
    388     if (ShrinkDemandedConstant(I, 1, DemandedMask))
    389       return I;
    390 
    391     // If our LHS is an 'and' and if it has one use, and if any of the bits we
    392     // are flipping are known to be set, then the xor is just resetting those
    393     // bits to zero.  We can just knock out bits from the 'and' and the 'xor',
    394     // simplifying both of them.
    395     if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
    396       if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
    397           isa<ConstantInt>(I->getOperand(1)) &&
    398           isa<ConstantInt>(LHSInst->getOperand(1)) &&
    399           (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
    400         ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
    401         ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
    402         APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
    403 
    404         Constant *AndC =
    405           ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
    406         Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
    407         InsertNewInstWith(NewAnd, *I);
    408 
    409         Constant *XorC =
    410           ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
    411         Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
    412         return InsertNewInstWith(NewXor, *I);
    413       }
    414 
    415     // Output known-0 bits are known if clear or set in both the LHS & RHS.
    416     KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne);
    417     // Output known-1 are known to be set if set in only one of the LHS, RHS.
    418     KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero);
    419     break;
    420   }
    421   case Instruction::Select:
    422     if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero,
    423                              RHSKnownOne, Depth + 1) ||
    424         SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero,
    425                              LHSKnownOne, Depth + 1))
    426       return I;
    427     assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
    428     assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
    429 
    430     // If the operands are constants, see if we can simplify them.
    431     if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
    432         ShrinkDemandedConstant(I, 2, DemandedMask))
    433       return I;
    434 
    435     // Only known if known in both the LHS and RHS.
    436     KnownOne = RHSKnownOne & LHSKnownOne;
    437     KnownZero = RHSKnownZero & LHSKnownZero;
    438     break;
    439   case Instruction::Trunc: {
    440     unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
    441     DemandedMask = DemandedMask.zext(truncBf);
    442     KnownZero = KnownZero.zext(truncBf);
    443     KnownOne = KnownOne.zext(truncBf);
    444     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
    445                              KnownOne, Depth + 1))
    446       return I;
    447     DemandedMask = DemandedMask.trunc(BitWidth);
    448     KnownZero = KnownZero.trunc(BitWidth);
    449     KnownOne = KnownOne.trunc(BitWidth);
    450     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    451     break;
    452   }
    453   case Instruction::BitCast:
    454     if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
    455       return nullptr;  // vector->int or fp->int?
    456 
    457     if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
    458       if (VectorType *SrcVTy =
    459             dyn_cast<VectorType>(I->getOperand(0)->getType())) {
    460         if (DstVTy->getNumElements() != SrcVTy->getNumElements())
    461           // Don't touch a bitcast between vectors of different element counts.
    462           return nullptr;
    463       } else
    464         // Don't touch a scalar-to-vector bitcast.
    465         return nullptr;
    466     } else if (I->getOperand(0)->getType()->isVectorTy())
    467       // Don't touch a vector-to-scalar bitcast.
    468       return nullptr;
    469 
    470     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
    471                              KnownOne, Depth + 1))
    472       return I;
    473     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    474     break;
    475   case Instruction::ZExt: {
    476     // Compute the bits in the result that are not present in the input.
    477     unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
    478 
    479     DemandedMask = DemandedMask.trunc(SrcBitWidth);
    480     KnownZero = KnownZero.trunc(SrcBitWidth);
    481     KnownOne = KnownOne.trunc(SrcBitWidth);
    482     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
    483                              KnownOne, Depth + 1))
    484       return I;
    485     DemandedMask = DemandedMask.zext(BitWidth);
    486     KnownZero = KnownZero.zext(BitWidth);
    487     KnownOne = KnownOne.zext(BitWidth);
    488     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    489     // The top bits are known to be zero.
    490     KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
    491     break;
    492   }
    493   case Instruction::SExt: {
    494     // Compute the bits in the result that are not present in the input.
    495     unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
    496 
    497     APInt InputDemandedBits = DemandedMask &
    498                               APInt::getLowBitsSet(BitWidth, SrcBitWidth);
    499 
    500     APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
    501     // If any of the sign extended bits are demanded, we know that the sign
    502     // bit is demanded.
    503     if ((NewBits & DemandedMask) != 0)
    504       InputDemandedBits.setBit(SrcBitWidth-1);
    505 
    506     InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
    507     KnownZero = KnownZero.trunc(SrcBitWidth);
    508     KnownOne = KnownOne.trunc(SrcBitWidth);
    509     if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, KnownZero,
    510                              KnownOne, Depth + 1))
    511       return I;
    512     InputDemandedBits = InputDemandedBits.zext(BitWidth);
    513     KnownZero = KnownZero.zext(BitWidth);
    514     KnownOne = KnownOne.zext(BitWidth);
    515     assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    516 
    517     // If the sign bit of the input is known set or clear, then we know the
    518     // top bits of the result.
    519 
    520     // If the input sign bit is known zero, or if the NewBits are not demanded
    521     // convert this into a zero extension.
    522     if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
    523       // Convert to ZExt cast
    524       CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
    525       return InsertNewInstWith(NewCast, *I);
    526     } else if (KnownOne[SrcBitWidth-1]) {    // Input sign bit known set
    527       KnownOne |= NewBits;
    528     }
    529     break;
    530   }
    531   case Instruction::Add: {
    532     // Figure out what the input bits are.  If the top bits of the and result
    533     // are not demanded, then the add doesn't demand them from its input
    534     // either.
    535     unsigned NLZ = DemandedMask.countLeadingZeros();
    536 
    537     // If there is a constant on the RHS, there are a variety of xformations
    538     // we can do.
    539     if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
    540       // If null, this should be simplified elsewhere.  Some of the xforms here
    541       // won't work if the RHS is zero.
    542       if (RHS->isZero())
    543         break;
    544 
    545       // If the top bit of the output is demanded, demand everything from the
    546       // input.  Otherwise, we demand all the input bits except NLZ top bits.
    547       APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
    548 
    549       // Find information about known zero/one bits in the input.
    550       if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits,
    551                                LHSKnownZero, LHSKnownOne, Depth + 1))
    552         return I;
    553 
    554       // If the RHS of the add has bits set that can't affect the input, reduce
    555       // the constant.
    556       if (ShrinkDemandedConstant(I, 1, InDemandedBits))
    557         return I;
    558 
    559       // Avoid excess work.
    560       if (LHSKnownZero == 0 && LHSKnownOne == 0)
    561         break;
    562 
    563       // Turn it into OR if input bits are zero.
    564       if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
    565         Instruction *Or =
    566           BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
    567                                    I->getName());
    568         return InsertNewInstWith(Or, *I);
    569       }
    570 
    571       // We can say something about the output known-zero and known-one bits,
    572       // depending on potential carries from the input constant and the
    573       // unknowns.  For example if the LHS is known to have at most the 0x0F0F0
    574       // bits set and the RHS constant is 0x01001, then we know we have a known
    575       // one mask of 0x00001 and a known zero mask of 0xE0F0E.
    576 
    577       // To compute this, we first compute the potential carry bits.  These are
    578       // the bits which may be modified.  I'm not aware of a better way to do
    579       // this scan.
    580       const APInt &RHSVal = RHS->getValue();
    581       APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
    582 
    583       // Now that we know which bits have carries, compute the known-1/0 sets.
    584 
    585       // Bits are known one if they are known zero in one operand and one in the
    586       // other, and there is no input carry.
    587       KnownOne = ((LHSKnownZero & RHSVal) |
    588                   (LHSKnownOne & ~RHSVal)) & ~CarryBits;
    589 
    590       // Bits are known zero if they are known zero in both operands and there
    591       // is no input carry.
    592       KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
    593     } else {
    594       // If the high-bits of this ADD are not demanded, then it does not demand
    595       // the high bits of its LHS or RHS.
    596       if (DemandedMask[BitWidth-1] == 0) {
    597         // Right fill the mask of bits for this ADD to demand the most
    598         // significant bit and all those below it.
    599         APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
    600         if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
    601                                  LHSKnownZero, LHSKnownOne, Depth + 1) ||
    602             SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
    603                                  LHSKnownZero, LHSKnownOne, Depth + 1))
    604           return I;
    605       }
    606     }
    607     break;
    608   }
    609   case Instruction::Sub:
    610     // If the high-bits of this SUB are not demanded, then it does not demand
    611     // the high bits of its LHS or RHS.
    612     if (DemandedMask[BitWidth-1] == 0) {
    613       // Right fill the mask of bits for this SUB to demand the most
    614       // significant bit and all those below it.
    615       uint32_t NLZ = DemandedMask.countLeadingZeros();
    616       APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
    617       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
    618                                LHSKnownZero, LHSKnownOne, Depth + 1) ||
    619           SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
    620                                LHSKnownZero, LHSKnownOne, Depth + 1))
    621         return I;
    622     }
    623 
    624     // Otherwise just hand the sub off to computeKnownBits to fill in
    625     // the known zeros and ones.
    626     computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
    627 
    628     // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
    629     // zero.
    630     if (ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(0))) {
    631       APInt I0 = C0->getValue();
    632       if ((I0 + 1).isPowerOf2() && (I0 | KnownZero).isAllOnesValue()) {
    633         Instruction *Xor = BinaryOperator::CreateXor(I->getOperand(1), C0);
    634         return InsertNewInstWith(Xor, *I);
    635       }
    636     }
    637     break;
    638   case Instruction::Shl:
    639     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
    640       {
    641         Value *VarX; ConstantInt *C1;
    642         if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) {
    643           Instruction *Shr = cast<Instruction>(I->getOperand(0));
    644           Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask,
    645                                                 KnownZero, KnownOne);
    646           if (R)
    647             return R;
    648         }
    649       }
    650 
    651       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
    652       APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
    653 
    654       // If the shift is NUW/NSW, then it does demand the high bits.
    655       ShlOperator *IOp = cast<ShlOperator>(I);
    656       if (IOp->hasNoSignedWrap())
    657         DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
    658       else if (IOp->hasNoUnsignedWrap())
    659         DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
    660 
    661       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
    662                                KnownOne, Depth + 1))
    663         return I;
    664       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    665       KnownZero <<= ShiftAmt;
    666       KnownOne  <<= ShiftAmt;
    667       // low bits known zero.
    668       if (ShiftAmt)
    669         KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
    670     }
    671     break;
    672   case Instruction::LShr:
    673     // For a logical shift right
    674     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
    675       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
    676 
    677       // Unsigned shift right.
    678       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
    679 
    680       // If the shift is exact, then it does demand the low bits (and knows that
    681       // they are zero).
    682       if (cast<LShrOperator>(I)->isExact())
    683         DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
    684 
    685       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
    686                                KnownOne, Depth + 1))
    687         return I;
    688       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    689       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
    690       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
    691       if (ShiftAmt) {
    692         // Compute the new bits that are at the top now.
    693         APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
    694         KnownZero |= HighBits;  // high bits known zero.
    695       }
    696     }
    697     break;
    698   case Instruction::AShr:
    699     // If this is an arithmetic shift right and only the low-bit is set, we can
    700     // always convert this into a logical shr, even if the shift amount is
    701     // variable.  The low bit of the shift cannot be an input sign bit unless
    702     // the shift amount is >= the size of the datatype, which is undefined.
    703     if (DemandedMask == 1) {
    704       // Perform the logical shift right.
    705       Instruction *NewVal = BinaryOperator::CreateLShr(
    706                         I->getOperand(0), I->getOperand(1), I->getName());
    707       return InsertNewInstWith(NewVal, *I);
    708     }
    709 
    710     // If the sign bit is the only bit demanded by this ashr, then there is no
    711     // need to do it, the shift doesn't change the high bit.
    712     if (DemandedMask.isSignBit())
    713       return I->getOperand(0);
    714 
    715     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
    716       uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
    717 
    718       // Signed shift right.
    719       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
    720       // If any of the "high bits" are demanded, we should set the sign bit as
    721       // demanded.
    722       if (DemandedMask.countLeadingZeros() <= ShiftAmt)
    723         DemandedMaskIn.setBit(BitWidth-1);
    724 
    725       // If the shift is exact, then it does demand the low bits (and knows that
    726       // they are zero).
    727       if (cast<AShrOperator>(I)->isExact())
    728         DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
    729 
    730       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
    731                                KnownOne, Depth + 1))
    732         return I;
    733       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    734       // Compute the new bits that are at the top now.
    735       APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
    736       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
    737       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
    738 
    739       // Handle the sign bits.
    740       APInt SignBit(APInt::getSignBit(BitWidth));
    741       // Adjust to where it is now in the mask.
    742       SignBit = APIntOps::lshr(SignBit, ShiftAmt);
    743 
    744       // If the input sign bit is known to be zero, or if none of the top bits
    745       // are demanded, turn this into an unsigned shift right.
    746       if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
    747           (HighBits & ~DemandedMask) == HighBits) {
    748         // Perform the logical shift right.
    749         BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0),
    750                                                             SA, I->getName());
    751         NewVal->setIsExact(cast<BinaryOperator>(I)->isExact());
    752         return InsertNewInstWith(NewVal, *I);
    753       } else if ((KnownOne & SignBit) != 0) { // New bits are known one.
    754         KnownOne |= HighBits;
    755       }
    756     }
    757     break;
    758   case Instruction::SRem:
    759     if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
    760       // X % -1 demands all the bits because we don't want to introduce
    761       // INT_MIN % -1 (== undef) by accident.
    762       if (Rem->isAllOnesValue())
    763         break;
    764       APInt RA = Rem->getValue().abs();
    765       if (RA.isPowerOf2()) {
    766         if (DemandedMask.ult(RA))    // srem won't affect demanded bits
    767           return I->getOperand(0);
    768 
    769         APInt LowBits = RA - 1;
    770         APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
    771         if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, LHSKnownZero,
    772                                  LHSKnownOne, Depth + 1))
    773           return I;
    774 
    775         // The low bits of LHS are unchanged by the srem.
    776         KnownZero = LHSKnownZero & LowBits;
    777         KnownOne = LHSKnownOne & LowBits;
    778 
    779         // If LHS is non-negative or has all low bits zero, then the upper bits
    780         // are all zero.
    781         if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
    782           KnownZero |= ~LowBits;
    783 
    784         // If LHS is negative and not all low bits are zero, then the upper bits
    785         // are all one.
    786         if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0))
    787           KnownOne |= ~LowBits;
    788 
    789         assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
    790       }
    791     }
    792 
    793     // The sign bit is the LHS's sign bit, except when the result of the
    794     // remainder is zero.
    795     if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
    796       APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
    797       computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
    798                        CxtI);
    799       // If it's known zero, our sign bit is also zero.
    800       if (LHSKnownZero.isNegative())
    801         KnownZero.setBit(KnownZero.getBitWidth() - 1);
    802     }
    803     break;
    804   case Instruction::URem: {
    805     APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
    806     APInt AllOnes = APInt::getAllOnesValue(BitWidth);
    807     if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, KnownZero2,
    808                              KnownOne2, Depth + 1) ||
    809         SimplifyDemandedBits(I->getOperandUse(1), AllOnes, KnownZero2,
    810                              KnownOne2, Depth + 1))
    811       return I;
    812 
    813     unsigned Leaders = KnownZero2.countLeadingOnes();
    814     Leaders = std::max(Leaders,
    815                        KnownZero2.countLeadingOnes());
    816     KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
    817     break;
    818   }
    819   case Instruction::Call:
    820     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
    821       switch (II->getIntrinsicID()) {
    822       default: break;
    823       case Intrinsic::bswap: {
    824         // If the only bits demanded come from one byte of the bswap result,
    825         // just shift the input byte into position to eliminate the bswap.
    826         unsigned NLZ = DemandedMask.countLeadingZeros();
    827         unsigned NTZ = DemandedMask.countTrailingZeros();
    828 
    829         // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
    830         // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
    831         // have 14 leading zeros, round to 8.
    832         NLZ &= ~7;
    833         NTZ &= ~7;
    834         // If we need exactly one byte, we can do this transformation.
    835         if (BitWidth-NLZ-NTZ == 8) {
    836           unsigned ResultBit = NTZ;
    837           unsigned InputBit = BitWidth-NTZ-8;
    838 
    839           // Replace this with either a left or right shift to get the byte into
    840           // the right place.
    841           Instruction *NewVal;
    842           if (InputBit > ResultBit)
    843             NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
    844                     ConstantInt::get(I->getType(), InputBit-ResultBit));
    845           else
    846             NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
    847                     ConstantInt::get(I->getType(), ResultBit-InputBit));
    848           NewVal->takeName(I);
    849           return InsertNewInstWith(NewVal, *I);
    850         }
    851 
    852         // TODO: Could compute known zero/one bits based on the input.
    853         break;
    854       }
    855       case Intrinsic::x86_sse42_crc32_64_64:
    856         KnownZero = APInt::getHighBitsSet(64, 32);
    857         return nullptr;
    858       }
    859     }
    860     computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
    861     break;
    862   }
    863 
    864   // If the client is only demanding bits that we know, return the known
    865   // constant.
    866   if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
    867     return Constant::getIntegerValue(VTy, KnownOne);
    868   return nullptr;
    869 }
    870 
    871 /// Helper routine of SimplifyDemandedUseBits. It tries to simplify
    872 /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
    873 /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
    874 /// of "C2-C1".
    875 ///
    876 /// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
    877 /// ..., bn}, without considering the specific value X is holding.
    878 /// This transformation is legal iff one of following conditions is hold:
    879 ///  1) All the bit in S are 0, in this case E1 == E2.
    880 ///  2) We don't care those bits in S, per the input DemandedMask.
    881 ///  3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
    882 ///     rest bits.
    883 ///
    884 /// Currently we only test condition 2).
    885 ///
    886 /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
    887 /// not successful.
    888 Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
    889   Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) {
    890 
    891   const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue();
    892   const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue();
    893   if (!ShlOp1 || !ShrOp1)
    894       return nullptr; // Noop.
    895 
    896   Value *VarX = Shr->getOperand(0);
    897   Type *Ty = VarX->getType();
    898   unsigned BitWidth = Ty->getIntegerBitWidth();
    899   if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
    900     return nullptr; // Undef.
    901 
    902   unsigned ShlAmt = ShlOp1.getZExtValue();
    903   unsigned ShrAmt = ShrOp1.getZExtValue();
    904 
    905   KnownOne.clearAllBits();
    906   KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1);
    907   KnownZero &= DemandedMask;
    908 
    909   APInt BitMask1(APInt::getAllOnesValue(BitWidth));
    910   APInt BitMask2(APInt::getAllOnesValue(BitWidth));
    911 
    912   bool isLshr = (Shr->getOpcode() == Instruction::LShr);
    913   BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
    914                       (BitMask1.ashr(ShrAmt) << ShlAmt);
    915 
    916   if (ShrAmt <= ShlAmt) {
    917     BitMask2 <<= (ShlAmt - ShrAmt);
    918   } else {
    919     BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
    920                         BitMask2.ashr(ShrAmt - ShlAmt);
    921   }
    922 
    923   // Check if condition-2 (see the comment to this function) is satified.
    924   if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
    925     if (ShrAmt == ShlAmt)
    926       return VarX;
    927 
    928     if (!Shr->hasOneUse())
    929       return nullptr;
    930 
    931     BinaryOperator *New;
    932     if (ShrAmt < ShlAmt) {
    933       Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
    934       New = BinaryOperator::CreateShl(VarX, Amt);
    935       BinaryOperator *Orig = cast<BinaryOperator>(Shl);
    936       New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
    937       New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
    938     } else {
    939       Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
    940       New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
    941                      BinaryOperator::CreateAShr(VarX, Amt);
    942       if (cast<BinaryOperator>(Shr)->isExact())
    943         New->setIsExact(true);
    944     }
    945 
    946     return InsertNewInstWith(New, *Shl);
    947   }
    948 
    949   return nullptr;
    950 }
    951 
    952 /// SimplifyDemandedVectorElts - The specified value produces a vector with
    953 /// any number of elements. DemandedElts contains the set of elements that are
    954 /// actually used by the caller.  This method analyzes which elements of the
    955 /// operand are undef and returns that information in UndefElts.
    956 ///
    957 /// If the information about demanded elements can be used to simplify the
    958 /// operation, the operation is simplified, then the resultant value is
    959 /// returned.  This returns null if no change was made.
    960 Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
    961                                                 APInt &UndefElts,
    962                                                 unsigned Depth) {
    963   unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
    964   APInt EltMask(APInt::getAllOnesValue(VWidth));
    965   assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
    966 
    967   if (isa<UndefValue>(V)) {
    968     // If the entire vector is undefined, just return this info.
    969     UndefElts = EltMask;
    970     return nullptr;
    971   }
    972 
    973   if (DemandedElts == 0) { // If nothing is demanded, provide undef.
    974     UndefElts = EltMask;
    975     return UndefValue::get(V->getType());
    976   }
    977 
    978   UndefElts = 0;
    979 
    980   // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
    981   if (Constant *C = dyn_cast<Constant>(V)) {
    982     // Check if this is identity. If so, return 0 since we are not simplifying
    983     // anything.
    984     if (DemandedElts.isAllOnesValue())
    985       return nullptr;
    986 
    987     Type *EltTy = cast<VectorType>(V->getType())->getElementType();
    988     Constant *Undef = UndefValue::get(EltTy);
    989 
    990     SmallVector<Constant*, 16> Elts;
    991     for (unsigned i = 0; i != VWidth; ++i) {
    992       if (!DemandedElts[i]) {   // If not demanded, set to undef.
    993         Elts.push_back(Undef);
    994         UndefElts.setBit(i);
    995         continue;
    996       }
    997 
    998       Constant *Elt = C->getAggregateElement(i);
    999       if (!Elt) return nullptr;
   1000 
   1001       if (isa<UndefValue>(Elt)) {   // Already undef.
   1002         Elts.push_back(Undef);
   1003         UndefElts.setBit(i);
   1004       } else {                               // Otherwise, defined.
   1005         Elts.push_back(Elt);
   1006       }
   1007     }
   1008 
   1009     // If we changed the constant, return it.
   1010     Constant *NewCV = ConstantVector::get(Elts);
   1011     return NewCV != C ? NewCV : nullptr;
   1012   }
   1013 
   1014   // Limit search depth.
   1015   if (Depth == 10)
   1016     return nullptr;
   1017 
   1018   // If multiple users are using the root value, proceed with
   1019   // simplification conservatively assuming that all elements
   1020   // are needed.
   1021   if (!V->hasOneUse()) {
   1022     // Quit if we find multiple users of a non-root value though.
   1023     // They'll be handled when it's their turn to be visited by
   1024     // the main instcombine process.
   1025     if (Depth != 0)
   1026       // TODO: Just compute the UndefElts information recursively.
   1027       return nullptr;
   1028 
   1029     // Conservatively assume that all elements are needed.
   1030     DemandedElts = EltMask;
   1031   }
   1032 
   1033   Instruction *I = dyn_cast<Instruction>(V);
   1034   if (!I) return nullptr;        // Only analyze instructions.
   1035 
   1036   bool MadeChange = false;
   1037   APInt UndefElts2(VWidth, 0);
   1038   Value *TmpV;
   1039   switch (I->getOpcode()) {
   1040   default: break;
   1041 
   1042   case Instruction::InsertElement: {
   1043     // If this is a variable index, we don't know which element it overwrites.
   1044     // demand exactly the same input as we produce.
   1045     ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
   1046     if (!Idx) {
   1047       // Note that we can't propagate undef elt info, because we don't know
   1048       // which elt is getting updated.
   1049       TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
   1050                                         UndefElts2, Depth + 1);
   1051       if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
   1052       break;
   1053     }
   1054 
   1055     // If this is inserting an element that isn't demanded, remove this
   1056     // insertelement.
   1057     unsigned IdxNo = Idx->getZExtValue();
   1058     if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
   1059       Worklist.Add(I);
   1060       return I->getOperand(0);
   1061     }
   1062 
   1063     // Otherwise, the element inserted overwrites whatever was there, so the
   1064     // input demanded set is simpler than the output set.
   1065     APInt DemandedElts2 = DemandedElts;
   1066     DemandedElts2.clearBit(IdxNo);
   1067     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
   1068                                       UndefElts, Depth + 1);
   1069     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
   1070 
   1071     // The inserted element is defined.
   1072     UndefElts.clearBit(IdxNo);
   1073     break;
   1074   }
   1075   case Instruction::ShuffleVector: {
   1076     ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
   1077     uint64_t LHSVWidth =
   1078       cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
   1079     APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
   1080     for (unsigned i = 0; i < VWidth; i++) {
   1081       if (DemandedElts[i]) {
   1082         unsigned MaskVal = Shuffle->getMaskValue(i);
   1083         if (MaskVal != -1u) {
   1084           assert(MaskVal < LHSVWidth * 2 &&
   1085                  "shufflevector mask index out of range!");
   1086           if (MaskVal < LHSVWidth)
   1087             LeftDemanded.setBit(MaskVal);
   1088           else
   1089             RightDemanded.setBit(MaskVal - LHSVWidth);
   1090         }
   1091       }
   1092     }
   1093 
   1094     APInt UndefElts4(LHSVWidth, 0);
   1095     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
   1096                                       UndefElts4, Depth + 1);
   1097     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
   1098 
   1099     APInt UndefElts3(LHSVWidth, 0);
   1100     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
   1101                                       UndefElts3, Depth + 1);
   1102     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
   1103 
   1104     bool NewUndefElts = false;
   1105     for (unsigned i = 0; i < VWidth; i++) {
   1106       unsigned MaskVal = Shuffle->getMaskValue(i);
   1107       if (MaskVal == -1u) {
   1108         UndefElts.setBit(i);
   1109       } else if (!DemandedElts[i]) {
   1110         NewUndefElts = true;
   1111         UndefElts.setBit(i);
   1112       } else if (MaskVal < LHSVWidth) {
   1113         if (UndefElts4[MaskVal]) {
   1114           NewUndefElts = true;
   1115           UndefElts.setBit(i);
   1116         }
   1117       } else {
   1118         if (UndefElts3[MaskVal - LHSVWidth]) {
   1119           NewUndefElts = true;
   1120           UndefElts.setBit(i);
   1121         }
   1122       }
   1123     }
   1124 
   1125     if (NewUndefElts) {
   1126       // Add additional discovered undefs.
   1127       SmallVector<Constant*, 16> Elts;
   1128       for (unsigned i = 0; i < VWidth; ++i) {
   1129         if (UndefElts[i])
   1130           Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
   1131         else
   1132           Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
   1133                                           Shuffle->getMaskValue(i)));
   1134       }
   1135       I->setOperand(2, ConstantVector::get(Elts));
   1136       MadeChange = true;
   1137     }
   1138     break;
   1139   }
   1140   case Instruction::Select: {
   1141     APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
   1142     if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
   1143       for (unsigned i = 0; i < VWidth; i++) {
   1144         if (CV->getAggregateElement(i)->isNullValue())
   1145           LeftDemanded.clearBit(i);
   1146         else
   1147           RightDemanded.clearBit(i);
   1148       }
   1149     }
   1150 
   1151     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, UndefElts,
   1152                                       Depth + 1);
   1153     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
   1154 
   1155     TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
   1156                                       UndefElts2, Depth + 1);
   1157     if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
   1158 
   1159     // Output elements are undefined if both are undefined.
   1160     UndefElts &= UndefElts2;
   1161     break;
   1162   }
   1163   case Instruction::BitCast: {
   1164     // Vector->vector casts only.
   1165     VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
   1166     if (!VTy) break;
   1167     unsigned InVWidth = VTy->getNumElements();
   1168     APInt InputDemandedElts(InVWidth, 0);
   1169     unsigned Ratio;
   1170 
   1171     if (VWidth == InVWidth) {
   1172       // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
   1173       // elements as are demanded of us.
   1174       Ratio = 1;
   1175       InputDemandedElts = DemandedElts;
   1176     } else if (VWidth > InVWidth) {
   1177       // Untested so far.
   1178       break;
   1179 
   1180       // If there are more elements in the result than there are in the source,
   1181       // then an input element is live if any of the corresponding output
   1182       // elements are live.
   1183       Ratio = VWidth/InVWidth;
   1184       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
   1185         if (DemandedElts[OutIdx])
   1186           InputDemandedElts.setBit(OutIdx/Ratio);
   1187       }
   1188     } else {
   1189       // Untested so far.
   1190       break;
   1191 
   1192       // If there are more elements in the source than there are in the result,
   1193       // then an input element is live if the corresponding output element is
   1194       // live.
   1195       Ratio = InVWidth/VWidth;
   1196       for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
   1197         if (DemandedElts[InIdx/Ratio])
   1198           InputDemandedElts.setBit(InIdx);
   1199     }
   1200 
   1201     // div/rem demand all inputs, because they don't want divide by zero.
   1202     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
   1203                                       UndefElts2, Depth + 1);
   1204     if (TmpV) {
   1205       I->setOperand(0, TmpV);
   1206       MadeChange = true;
   1207     }
   1208 
   1209     UndefElts = UndefElts2;
   1210     if (VWidth > InVWidth) {
   1211       llvm_unreachable("Unimp");
   1212       // If there are more elements in the result than there are in the source,
   1213       // then an output element is undef if the corresponding input element is
   1214       // undef.
   1215       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
   1216         if (UndefElts2[OutIdx/Ratio])
   1217           UndefElts.setBit(OutIdx);
   1218     } else if (VWidth < InVWidth) {
   1219       llvm_unreachable("Unimp");
   1220       // If there are more elements in the source than there are in the result,
   1221       // then a result element is undef if all of the corresponding input
   1222       // elements are undef.
   1223       UndefElts = ~0ULL >> (64-VWidth);  // Start out all undef.
   1224       for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
   1225         if (!UndefElts2[InIdx])            // Not undef?
   1226           UndefElts.clearBit(InIdx/Ratio);    // Clear undef bit.
   1227     }
   1228     break;
   1229   }
   1230   case Instruction::And:
   1231   case Instruction::Or:
   1232   case Instruction::Xor:
   1233   case Instruction::Add:
   1234   case Instruction::Sub:
   1235   case Instruction::Mul:
   1236     // div/rem demand all inputs, because they don't want divide by zero.
   1237     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
   1238                                       Depth + 1);
   1239     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
   1240     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
   1241                                       UndefElts2, Depth + 1);
   1242     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
   1243 
   1244     // Output elements are undefined if both are undefined.  Consider things
   1245     // like undef&0.  The result is known zero, not undef.
   1246     UndefElts &= UndefElts2;
   1247     break;
   1248   case Instruction::FPTrunc:
   1249   case Instruction::FPExt:
   1250     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
   1251                                       Depth + 1);
   1252     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
   1253     break;
   1254 
   1255   case Instruction::Call: {
   1256     IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
   1257     if (!II) break;
   1258     switch (II->getIntrinsicID()) {
   1259     default: break;
   1260 
   1261     // Binary vector operations that work column-wise.  A dest element is a
   1262     // function of the corresponding input elements from the two inputs.
   1263     case Intrinsic::x86_sse_sub_ss:
   1264     case Intrinsic::x86_sse_mul_ss:
   1265     case Intrinsic::x86_sse_min_ss:
   1266     case Intrinsic::x86_sse_max_ss:
   1267     case Intrinsic::x86_sse2_sub_sd:
   1268     case Intrinsic::x86_sse2_mul_sd:
   1269     case Intrinsic::x86_sse2_min_sd:
   1270     case Intrinsic::x86_sse2_max_sd:
   1271       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
   1272                                         UndefElts, Depth + 1);
   1273       if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
   1274       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
   1275                                         UndefElts2, Depth + 1);
   1276       if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
   1277 
   1278       // If only the low elt is demanded and this is a scalarizable intrinsic,
   1279       // scalarize it now.
   1280       if (DemandedElts == 1) {
   1281         switch (II->getIntrinsicID()) {
   1282         default: break;
   1283         case Intrinsic::x86_sse_sub_ss:
   1284         case Intrinsic::x86_sse_mul_ss:
   1285         case Intrinsic::x86_sse2_sub_sd:
   1286         case Intrinsic::x86_sse2_mul_sd:
   1287           // TODO: Lower MIN/MAX/ABS/etc
   1288           Value *LHS = II->getArgOperand(0);
   1289           Value *RHS = II->getArgOperand(1);
   1290           // Extract the element as scalars.
   1291           LHS = InsertNewInstWith(ExtractElementInst::Create(LHS,
   1292             ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
   1293           RHS = InsertNewInstWith(ExtractElementInst::Create(RHS,
   1294             ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
   1295 
   1296           switch (II->getIntrinsicID()) {
   1297           default: llvm_unreachable("Case stmts out of sync!");
   1298           case Intrinsic::x86_sse_sub_ss:
   1299           case Intrinsic::x86_sse2_sub_sd:
   1300             TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS,
   1301                                                         II->getName()), *II);
   1302             break;
   1303           case Intrinsic::x86_sse_mul_ss:
   1304           case Intrinsic::x86_sse2_mul_sd:
   1305             TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS,
   1306                                                          II->getName()), *II);
   1307             break;
   1308           }
   1309 
   1310           Instruction *New =
   1311             InsertElementInst::Create(
   1312               UndefValue::get(II->getType()), TmpV,
   1313               ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false),
   1314                                       II->getName());
   1315           InsertNewInstWith(New, *II);
   1316           return New;
   1317         }
   1318       }
   1319 
   1320       // Output elements are undefined if both are undefined.  Consider things
   1321       // like undef&0.  The result is known zero, not undef.
   1322       UndefElts &= UndefElts2;
   1323       break;
   1324     }
   1325     break;
   1326   }
   1327   }
   1328   return MadeChange ? I : nullptr;
   1329 }
   1330