Home | History | Annotate | Download | only in InstCombine
      1 //===- InstCombineCompares.cpp --------------------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the visitICmp and visitFCmp functions.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "InstCombineInternal.h"
     15 #include "llvm/ADT/APSInt.h"
     16 #include "llvm/ADT/SetVector.h"
     17 #include "llvm/ADT/Statistic.h"
     18 #include "llvm/Analysis/ConstantFolding.h"
     19 #include "llvm/Analysis/InstructionSimplify.h"
     20 #include "llvm/Analysis/TargetLibraryInfo.h"
     21 #include "llvm/IR/ConstantRange.h"
     22 #include "llvm/IR/DataLayout.h"
     23 #include "llvm/IR/GetElementPtrTypeIterator.h"
     24 #include "llvm/IR/IntrinsicInst.h"
     25 #include "llvm/IR/PatternMatch.h"
     26 #include "llvm/Support/Debug.h"
     27 #include "llvm/Support/KnownBits.h"
     28 
     29 using namespace llvm;
     30 using namespace PatternMatch;
     31 
     32 #define DEBUG_TYPE "instcombine"
     33 
     34 // How many times is a select replaced by one of its operands?
     35 STATISTIC(NumSel, "Number of select opts");
     36 
     37 
     38 /// Compute Result = In1+In2, returning true if the result overflowed for this
     39 /// type.
     40 static bool addWithOverflow(APInt &Result, const APInt &In1,
     41                             const APInt &In2, bool IsSigned = false) {
     42   bool Overflow;
     43   if (IsSigned)
     44     Result = In1.sadd_ov(In2, Overflow);
     45   else
     46     Result = In1.uadd_ov(In2, Overflow);
     47 
     48   return Overflow;
     49 }
     50 
     51 /// Compute Result = In1-In2, returning true if the result overflowed for this
     52 /// type.
     53 static bool subWithOverflow(APInt &Result, const APInt &In1,
     54                             const APInt &In2, bool IsSigned = false) {
     55   bool Overflow;
     56   if (IsSigned)
     57     Result = In1.ssub_ov(In2, Overflow);
     58   else
     59     Result = In1.usub_ov(In2, Overflow);
     60 
     61   return Overflow;
     62 }
     63 
     64 /// Given an icmp instruction, return true if any use of this comparison is a
     65 /// branch on sign bit comparison.
     66 static bool hasBranchUse(ICmpInst &I) {
     67   for (auto *U : I.users())
     68     if (isa<BranchInst>(U))
     69       return true;
     70   return false;
     71 }
     72 
     73 /// Given an exploded icmp instruction, return true if the comparison only
     74 /// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if the
     75 /// result of the comparison is true when the input value is signed.
     76 static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS,
     77                            bool &TrueIfSigned) {
     78   switch (Pred) {
     79   case ICmpInst::ICMP_SLT:   // True if LHS s< 0
     80     TrueIfSigned = true;
     81     return RHS.isNullValue();
     82   case ICmpInst::ICMP_SLE:   // True if LHS s<= RHS and RHS == -1
     83     TrueIfSigned = true;
     84     return RHS.isAllOnesValue();
     85   case ICmpInst::ICMP_SGT:   // True if LHS s> -1
     86     TrueIfSigned = false;
     87     return RHS.isAllOnesValue();
     88   case ICmpInst::ICMP_UGT:
     89     // True if LHS u> RHS and RHS == high-bit-mask - 1
     90     TrueIfSigned = true;
     91     return RHS.isMaxSignedValue();
     92   case ICmpInst::ICMP_UGE:
     93     // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
     94     TrueIfSigned = true;
     95     return RHS.isSignMask();
     96   default:
     97     return false;
     98   }
     99 }
    100 
    101 /// Returns true if the exploded icmp can be expressed as a signed comparison
    102 /// to zero and updates the predicate accordingly.
    103 /// The signedness of the comparison is preserved.
    104 /// TODO: Refactor with decomposeBitTestICmp()?
    105 static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
    106   if (!ICmpInst::isSigned(Pred))
    107     return false;
    108 
    109   if (C.isNullValue())
    110     return ICmpInst::isRelational(Pred);
    111 
    112   if (C.isOneValue()) {
    113     if (Pred == ICmpInst::ICMP_SLT) {
    114       Pred = ICmpInst::ICMP_SLE;
    115       return true;
    116     }
    117   } else if (C.isAllOnesValue()) {
    118     if (Pred == ICmpInst::ICMP_SGT) {
    119       Pred = ICmpInst::ICMP_SGE;
    120       return true;
    121     }
    122   }
    123 
    124   return false;
    125 }
    126 
    127 /// Given a signed integer type and a set of known zero and one bits, compute
    128 /// the maximum and minimum values that could have the specified known zero and
    129 /// known one bits, returning them in Min/Max.
    130 /// TODO: Move to method on KnownBits struct?
    131 static void computeSignedMinMaxValuesFromKnownBits(const KnownBits &Known,
    132                                                    APInt &Min, APInt &Max) {
    133   assert(Known.getBitWidth() == Min.getBitWidth() &&
    134          Known.getBitWidth() == Max.getBitWidth() &&
    135          "KnownZero, KnownOne and Min, Max must have equal bitwidth.");
    136   APInt UnknownBits = ~(Known.Zero|Known.One);
    137 
    138   // The minimum value is when all unknown bits are zeros, EXCEPT for the sign
    139   // bit if it is unknown.
    140   Min = Known.One;
    141   Max = Known.One|UnknownBits;
    142 
    143   if (UnknownBits.isNegative()) { // Sign bit is unknown
    144     Min.setSignBit();
    145     Max.clearSignBit();
    146   }
    147 }
    148 
    149 /// Given an unsigned integer type and a set of known zero and one bits, compute
    150 /// the maximum and minimum values that could have the specified known zero and
    151 /// known one bits, returning them in Min/Max.
    152 /// TODO: Move to method on KnownBits struct?
    153 static void computeUnsignedMinMaxValuesFromKnownBits(const KnownBits &Known,
    154                                                      APInt &Min, APInt &Max) {
    155   assert(Known.getBitWidth() == Min.getBitWidth() &&
    156          Known.getBitWidth() == Max.getBitWidth() &&
    157          "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.");
    158   APInt UnknownBits = ~(Known.Zero|Known.One);
    159 
    160   // The minimum value is when the unknown bits are all zeros.
    161   Min = Known.One;
    162   // The maximum value is when the unknown bits are all ones.
    163   Max = Known.One|UnknownBits;
    164 }
    165 
    166 /// This is called when we see this pattern:
    167 ///   cmp pred (load (gep GV, ...)), cmpcst
    168 /// where GV is a global variable with a constant initializer. Try to simplify
    169 /// this into some simple computation that does not need the load. For example
    170 /// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3".
    171 ///
    172 /// If AndCst is non-null, then the loaded value is masked with that constant
    173 /// before doing the comparison. This handles cases like "A[i]&4 == 0".
    174 Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
    175                                                         GlobalVariable *GV,
    176                                                         CmpInst &ICI,
    177                                                         ConstantInt *AndCst) {
    178   Constant *Init = GV->getInitializer();
    179   if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
    180     return nullptr;
    181 
    182   uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
    183   // Don't blow up on huge arrays.
    184   if (ArrayElementCount > MaxArraySizeForCombine)
    185     return nullptr;
    186 
    187   // There are many forms of this optimization we can handle, for now, just do
    188   // the simple index into a single-dimensional array.
    189   //
    190   // Require: GEP GV, 0, i {{, constant indices}}
    191   if (GEP->getNumOperands() < 3 ||
    192       !isa<ConstantInt>(GEP->getOperand(1)) ||
    193       !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
    194       isa<Constant>(GEP->getOperand(2)))
    195     return nullptr;
    196 
    197   // Check that indices after the variable are constants and in-range for the
    198   // type they index.  Collect the indices.  This is typically for arrays of
    199   // structs.
    200   SmallVector<unsigned, 4> LaterIndices;
    201 
    202   Type *EltTy = Init->getType()->getArrayElementType();
    203   for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
    204     ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
    205     if (!Idx) return nullptr;  // Variable index.
    206 
    207     uint64_t IdxVal = Idx->getZExtValue();
    208     if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index.
    209 
    210     if (StructType *STy = dyn_cast<StructType>(EltTy))
    211       EltTy = STy->getElementType(IdxVal);
    212     else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
    213       if (IdxVal >= ATy->getNumElements()) return nullptr;
    214       EltTy = ATy->getElementType();
    215     } else {
    216       return nullptr; // Unknown type.
    217     }
    218 
    219     LaterIndices.push_back(IdxVal);
    220   }
    221 
    222   enum { Overdefined = -3, Undefined = -2 };
    223 
    224   // Variables for our state machines.
    225 
    226   // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form
    227   // "i == 47 | i == 87", where 47 is the first index the condition is true for,
    228   // and 87 is the second (and last) index.  FirstTrueElement is -2 when
    229   // undefined, otherwise set to the first true element.  SecondTrueElement is
    230   // -2 when undefined, -3 when overdefined and >= 0 when that index is true.
    231   int FirstTrueElement = Undefined, SecondTrueElement = Undefined;
    232 
    233   // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the
    234   // form "i != 47 & i != 87".  Same state transitions as for true elements.
    235   int FirstFalseElement = Undefined, SecondFalseElement = Undefined;
    236 
    237   /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these
    238   /// define a state machine that triggers for ranges of values that the index
    239   /// is true or false for.  This triggers on things like "abbbbc"[i] == 'b'.
    240   /// This is -2 when undefined, -3 when overdefined, and otherwise the last
    241   /// index in the range (inclusive).  We use -2 for undefined here because we
    242   /// use relative comparisons and don't want 0-1 to match -1.
    243   int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined;
    244 
    245   // MagicBitvector - This is a magic bitvector where we set a bit if the
    246   // comparison is true for element 'i'.  If there are 64 elements or less in
    247   // the array, this will fully represent all the comparison results.
    248   uint64_t MagicBitvector = 0;
    249 
    250   // Scan the array and see if one of our patterns matches.
    251   Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
    252   for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
    253     Constant *Elt = Init->getAggregateElement(i);
    254     if (!Elt) return nullptr;
    255 
    256     // If this is indexing an array of structures, get the structure element.
    257     if (!LaterIndices.empty())
    258       Elt = ConstantExpr::getExtractValue(Elt, LaterIndices);
    259 
    260     // If the element is masked, handle it.
    261     if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst);
    262 
    263     // Find out if the comparison would be true or false for the i'th element.
    264     Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
    265                                                   CompareRHS, DL, &TLI);
    266     // If the result is undef for this element, ignore it.
    267     if (isa<UndefValue>(C)) {
    268       // Extend range state machines to cover this element in case there is an
    269       // undef in the middle of the range.
    270       if (TrueRangeEnd == (int)i-1)
    271         TrueRangeEnd = i;
    272       if (FalseRangeEnd == (int)i-1)
    273         FalseRangeEnd = i;
    274       continue;
    275     }
    276 
    277     // If we can't compute the result for any of the elements, we have to give
    278     // up evaluating the entire conditional.
    279     if (!isa<ConstantInt>(C)) return nullptr;
    280 
    281     // Otherwise, we know if the comparison is true or false for this element,
    282     // update our state machines.
    283     bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
    284 
    285     // State machine for single/double/range index comparison.
    286     if (IsTrueForElt) {
    287       // Update the TrueElement state machine.
    288       if (FirstTrueElement == Undefined)
    289         FirstTrueElement = TrueRangeEnd = i;  // First true element.
    290       else {
    291         // Update double-compare state machine.
    292         if (SecondTrueElement == Undefined)
    293           SecondTrueElement = i;
    294         else
    295           SecondTrueElement = Overdefined;
    296 
    297         // Update range state machine.
    298         if (TrueRangeEnd == (int)i-1)
    299           TrueRangeEnd = i;
    300         else
    301           TrueRangeEnd = Overdefined;
    302       }
    303     } else {
    304       // Update the FalseElement state machine.
    305       if (FirstFalseElement == Undefined)
    306         FirstFalseElement = FalseRangeEnd = i; // First false element.
    307       else {
    308         // Update double-compare state machine.
    309         if (SecondFalseElement == Undefined)
    310           SecondFalseElement = i;
    311         else
    312           SecondFalseElement = Overdefined;
    313 
    314         // Update range state machine.
    315         if (FalseRangeEnd == (int)i-1)
    316           FalseRangeEnd = i;
    317         else
    318           FalseRangeEnd = Overdefined;
    319       }
    320     }
    321 
    322     // If this element is in range, update our magic bitvector.
    323     if (i < 64 && IsTrueForElt)
    324       MagicBitvector |= 1ULL << i;
    325 
    326     // If all of our states become overdefined, bail out early.  Since the
    327     // predicate is expensive, only check it every 8 elements.  This is only
    328     // really useful for really huge arrays.
    329     if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined &&
    330         SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined &&
    331         FalseRangeEnd == Overdefined)
    332       return nullptr;
    333   }
    334 
    335   // Now that we've scanned the entire array, emit our new comparison(s).  We
    336   // order the state machines in complexity of the generated code.
    337   Value *Idx = GEP->getOperand(2);
    338 
    339   // If the index is larger than the pointer size of the target, truncate the
    340   // index down like the GEP would do implicitly.  We don't have to do this for
    341   // an inbounds GEP because the index can't be out of range.
    342   if (!GEP->isInBounds()) {
    343     Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
    344     unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
    345     if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize)
    346       Idx = Builder.CreateTrunc(Idx, IntPtrTy);
    347   }
    348 
    349   // If the comparison is only true for one or two elements, emit direct
    350   // comparisons.
    351   if (SecondTrueElement != Overdefined) {
    352     // None true -> false.
    353     if (FirstTrueElement == Undefined)
    354       return replaceInstUsesWith(ICI, Builder.getFalse());
    355 
    356     Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
    357 
    358     // True for one element -> 'i == 47'.
    359     if (SecondTrueElement == Undefined)
    360       return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
    361 
    362     // True for two elements -> 'i == 47 | i == 72'.
    363     Value *C1 = Builder.CreateICmpEQ(Idx, FirstTrueIdx);
    364     Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
    365     Value *C2 = Builder.CreateICmpEQ(Idx, SecondTrueIdx);
    366     return BinaryOperator::CreateOr(C1, C2);
    367   }
    368 
    369   // If the comparison is only false for one or two elements, emit direct
    370   // comparisons.
    371   if (SecondFalseElement != Overdefined) {
    372     // None false -> true.
    373     if (FirstFalseElement == Undefined)
    374       return replaceInstUsesWith(ICI, Builder.getTrue());
    375 
    376     Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
    377 
    378     // False for one element -> 'i != 47'.
    379     if (SecondFalseElement == Undefined)
    380       return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
    381 
    382     // False for two elements -> 'i != 47 & i != 72'.
    383     Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx);
    384     Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
    385     Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx);
    386     return BinaryOperator::CreateAnd(C1, C2);
    387   }
    388 
    389   // If the comparison can be replaced with a range comparison for the elements
    390   // where it is true, emit the range check.
    391   if (TrueRangeEnd != Overdefined) {
    392     assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
    393 
    394     // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
    395     if (FirstTrueElement) {
    396       Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
    397       Idx = Builder.CreateAdd(Idx, Offs);
    398     }
    399 
    400     Value *End = ConstantInt::get(Idx->getType(),
    401                                   TrueRangeEnd-FirstTrueElement+1);
    402     return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
    403   }
    404 
    405   // False range check.
    406   if (FalseRangeEnd != Overdefined) {
    407     assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
    408     // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
    409     if (FirstFalseElement) {
    410       Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
    411       Idx = Builder.CreateAdd(Idx, Offs);
    412     }
    413 
    414     Value *End = ConstantInt::get(Idx->getType(),
    415                                   FalseRangeEnd-FirstFalseElement);
    416     return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
    417   }
    418 
    419   // If a magic bitvector captures the entire comparison state
    420   // of this load, replace it with computation that does:
    421   //   ((magic_cst >> i) & 1) != 0
    422   {
    423     Type *Ty = nullptr;
    424 
    425     // Look for an appropriate type:
    426     // - The type of Idx if the magic fits
    427     // - The smallest fitting legal type
    428     if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
    429       Ty = Idx->getType();
    430     else
    431       Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
    432 
    433     if (Ty) {
    434       Value *V = Builder.CreateIntCast(Idx, Ty, false);
    435       V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
    436       V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V);
    437       return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
    438     }
    439   }
    440 
    441   return nullptr;
    442 }
    443 
    444 /// Return a value that can be used to compare the *offset* implied by a GEP to
    445 /// zero. For example, if we have &A[i], we want to return 'i' for
    446 /// "icmp ne i, 0". Note that, in general, indices can be complex, and scales
    447 /// are involved. The above expression would also be legal to codegen as
    448 /// "icmp ne (i*4), 0" (assuming A is a pointer to i32).
    449 /// This latter form is less amenable to optimization though, and we are allowed
    450 /// to generate the first by knowing that pointer arithmetic doesn't overflow.
    451 ///
    452 /// If we can't emit an optimized form for this expression, this returns null.
    453 ///
    454 static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
    455                                           const DataLayout &DL) {
    456   gep_type_iterator GTI = gep_type_begin(GEP);
    457 
    458   // Check to see if this gep only has a single variable index.  If so, and if
    459   // any constant indices are a multiple of its scale, then we can compute this
    460   // in terms of the scale of the variable index.  For example, if the GEP
    461   // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
    462   // because the expression will cross zero at the same point.
    463   unsigned i, e = GEP->getNumOperands();
    464   int64_t Offset = 0;
    465   for (i = 1; i != e; ++i, ++GTI) {
    466     if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
    467       // Compute the aggregate offset of constant indices.
    468       if (CI->isZero()) continue;
    469 
    470       // Handle a struct index, which adds its field offset to the pointer.
    471       if (StructType *STy = GTI.getStructTypeOrNull()) {
    472         Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
    473       } else {
    474         uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
    475         Offset += Size*CI->getSExtValue();
    476       }
    477     } else {
    478       // Found our variable index.
    479       break;
    480     }
    481   }
    482 
    483   // If there are no variable indices, we must have a constant offset, just
    484   // evaluate it the general way.
    485   if (i == e) return nullptr;
    486 
    487   Value *VariableIdx = GEP->getOperand(i);
    488   // Determine the scale factor of the variable element.  For example, this is
    489   // 4 if the variable index is into an array of i32.
    490   uint64_t VariableScale = DL.getTypeAllocSize(GTI.getIndexedType());
    491 
    492   // Verify that there are no other variable indices.  If so, emit the hard way.
    493   for (++i, ++GTI; i != e; ++i, ++GTI) {
    494     ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
    495     if (!CI) return nullptr;
    496 
    497     // Compute the aggregate offset of constant indices.
    498     if (CI->isZero()) continue;
    499 
    500     // Handle a struct index, which adds its field offset to the pointer.
    501     if (StructType *STy = GTI.getStructTypeOrNull()) {
    502       Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
    503     } else {
    504       uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
    505       Offset += Size*CI->getSExtValue();
    506     }
    507   }
    508 
    509   // Okay, we know we have a single variable index, which must be a
    510   // pointer/array/vector index.  If there is no offset, life is simple, return
    511   // the index.
    512   Type *IntPtrTy = DL.getIntPtrType(GEP->getOperand(0)->getType());
    513   unsigned IntPtrWidth = IntPtrTy->getIntegerBitWidth();
    514   if (Offset == 0) {
    515     // Cast to intptrty in case a truncation occurs.  If an extension is needed,
    516     // we don't need to bother extending: the extension won't affect where the
    517     // computation crosses zero.
    518     if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) {
    519       VariableIdx = IC.Builder.CreateTrunc(VariableIdx, IntPtrTy);
    520     }
    521     return VariableIdx;
    522   }
    523 
    524   // Otherwise, there is an index.  The computation we will do will be modulo
    525   // the pointer size, so get it.
    526   uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
    527 
    528   Offset &= PtrSizeMask;
    529   VariableScale &= PtrSizeMask;
    530 
    531   // To do this transformation, any constant index must be a multiple of the
    532   // variable scale factor.  For example, we can evaluate "12 + 4*i" as "3 + i",
    533   // but we can't evaluate "10 + 3*i" in terms of i.  Check that the offset is a
    534   // multiple of the variable scale.
    535   int64_t NewOffs = Offset / (int64_t)VariableScale;
    536   if (Offset != NewOffs*(int64_t)VariableScale)
    537     return nullptr;
    538 
    539   // Okay, we can do this evaluation.  Start by converting the index to intptr.
    540   if (VariableIdx->getType() != IntPtrTy)
    541     VariableIdx = IC.Builder.CreateIntCast(VariableIdx, IntPtrTy,
    542                                             true /*Signed*/);
    543   Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
    544   return IC.Builder.CreateAdd(VariableIdx, OffsetVal, "offset");
    545 }
    546 
    547 /// Returns true if we can rewrite Start as a GEP with pointer Base
    548 /// and some integer offset. The nodes that need to be re-written
    549 /// for this transformation will be added to Explored.
    550 static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
    551                                   const DataLayout &DL,
    552                                   SetVector<Value *> &Explored) {
    553   SmallVector<Value *, 16> WorkList(1, Start);
    554   Explored.insert(Base);
    555 
    556   // The following traversal gives us an order which can be used
    557   // when doing the final transformation. Since in the final
    558   // transformation we create the PHI replacement instructions first,
    559   // we don't have to get them in any particular order.
    560   //
    561   // However, for other instructions we will have to traverse the
    562   // operands of an instruction first, which means that we have to
    563   // do a post-order traversal.
    564   while (!WorkList.empty()) {
    565     SetVector<PHINode *> PHIs;
    566 
    567     while (!WorkList.empty()) {
    568       if (Explored.size() >= 100)
    569         return false;
    570 
    571       Value *V = WorkList.back();
    572 
    573       if (Explored.count(V) != 0) {
    574         WorkList.pop_back();
    575         continue;
    576       }
    577 
    578       if (!isa<IntToPtrInst>(V) && !isa<PtrToIntInst>(V) &&
    579           !isa<GetElementPtrInst>(V) && !isa<PHINode>(V))
    580         // We've found some value that we can't explore which is different from
    581         // the base. Therefore we can't do this transformation.
    582         return false;
    583 
    584       if (isa<IntToPtrInst>(V) || isa<PtrToIntInst>(V)) {
    585         auto *CI = dyn_cast<CastInst>(V);
    586         if (!CI->isNoopCast(DL))
    587           return false;
    588 
    589         if (Explored.count(CI->getOperand(0)) == 0)
    590           WorkList.push_back(CI->getOperand(0));
    591       }
    592 
    593       if (auto *GEP = dyn_cast<GEPOperator>(V)) {
    594         // We're limiting the GEP to having one index. This will preserve
    595         // the original pointer type. We could handle more cases in the
    596         // future.
    597         if (GEP->getNumIndices() != 1 || !GEP->isInBounds() ||
    598             GEP->getType() != Start->getType())
    599           return false;
    600 
    601         if (Explored.count(GEP->getOperand(0)) == 0)
    602           WorkList.push_back(GEP->getOperand(0));
    603       }
    604 
    605       if (WorkList.back() == V) {
    606         WorkList.pop_back();
    607         // We've finished visiting this node, mark it as such.
    608         Explored.insert(V);
    609       }
    610 
    611       if (auto *PN = dyn_cast<PHINode>(V)) {
    612         // We cannot transform PHIs on unsplittable basic blocks.
    613         if (isa<CatchSwitchInst>(PN->getParent()->getTerminator()))
    614           return false;
    615         Explored.insert(PN);
    616         PHIs.insert(PN);
    617       }
    618     }
    619 
    620     // Explore the PHI nodes further.
    621     for (auto *PN : PHIs)
    622       for (Value *Op : PN->incoming_values())
    623         if (Explored.count(Op) == 0)
    624           WorkList.push_back(Op);
    625   }
    626 
    627   // Make sure that we can do this. Since we can't insert GEPs in a basic
    628   // block before a PHI node, we can't easily do this transformation if
    629   // we have PHI node users of transformed instructions.
    630   for (Value *Val : Explored) {
    631     for (Value *Use : Val->uses()) {
    632 
    633       auto *PHI = dyn_cast<PHINode>(Use);
    634       auto *Inst = dyn_cast<Instruction>(Val);
    635 
    636       if (Inst == Base || Inst == PHI || !Inst || !PHI ||
    637           Explored.count(PHI) == 0)
    638         continue;
    639 
    640       if (PHI->getParent() == Inst->getParent())
    641         return false;
    642     }
    643   }
    644   return true;
    645 }
    646 
    647 // Sets the appropriate insert point on Builder where we can add
    648 // a replacement Instruction for V (if that is possible).
    649 static void setInsertionPoint(IRBuilder<> &Builder, Value *V,
    650                               bool Before = true) {
    651   if (auto *PHI = dyn_cast<PHINode>(V)) {
    652     Builder.SetInsertPoint(&*PHI->getParent()->getFirstInsertionPt());
    653     return;
    654   }
    655   if (auto *I = dyn_cast<Instruction>(V)) {
    656     if (!Before)
    657       I = &*std::next(I->getIterator());
    658     Builder.SetInsertPoint(I);
    659     return;
    660   }
    661   if (auto *A = dyn_cast<Argument>(V)) {
    662     // Set the insertion point in the entry block.
    663     BasicBlock &Entry = A->getParent()->getEntryBlock();
    664     Builder.SetInsertPoint(&*Entry.getFirstInsertionPt());
    665     return;
    666   }
    667   // Otherwise, this is a constant and we don't need to set a new
    668   // insertion point.
    669   assert(isa<Constant>(V) && "Setting insertion point for unknown value!");
    670 }
    671 
    672 /// Returns a re-written value of Start as an indexed GEP using Base as a
    673 /// pointer.
    674 static Value *rewriteGEPAsOffset(Value *Start, Value *Base,
    675                                  const DataLayout &DL,
    676                                  SetVector<Value *> &Explored) {
    677   // Perform all the substitutions. This is a bit tricky because we can
    678   // have cycles in our use-def chains.
    679   // 1. Create the PHI nodes without any incoming values.
    680   // 2. Create all the other values.
    681   // 3. Add the edges for the PHI nodes.
    682   // 4. Emit GEPs to get the original pointers.
    683   // 5. Remove the original instructions.
    684   Type *IndexType = IntegerType::get(
    685       Base->getContext(), DL.getIndexTypeSizeInBits(Start->getType()));
    686 
    687   DenseMap<Value *, Value *> NewInsts;
    688   NewInsts[Base] = ConstantInt::getNullValue(IndexType);
    689 
    690   // Create the new PHI nodes, without adding any incoming values.
    691   for (Value *Val : Explored) {
    692     if (Val == Base)
    693       continue;
    694     // Create empty phi nodes. This avoids cyclic dependencies when creating
    695     // the remaining instructions.
    696     if (auto *PHI = dyn_cast<PHINode>(Val))
    697       NewInsts[PHI] = PHINode::Create(IndexType, PHI->getNumIncomingValues(),
    698                                       PHI->getName() + ".idx", PHI);
    699   }
    700   IRBuilder<> Builder(Base->getContext());
    701 
    702   // Create all the other instructions.
    703   for (Value *Val : Explored) {
    704 
    705     if (NewInsts.find(Val) != NewInsts.end())
    706       continue;
    707 
    708     if (auto *CI = dyn_cast<CastInst>(Val)) {
    709       NewInsts[CI] = NewInsts[CI->getOperand(0)];
    710       continue;
    711     }
    712     if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
    713       Value *Index = NewInsts[GEP->getOperand(1)] ? NewInsts[GEP->getOperand(1)]
    714                                                   : GEP->getOperand(1);
    715       setInsertionPoint(Builder, GEP);
    716       // Indices might need to be sign extended. GEPs will magically do
    717       // this, but we need to do it ourselves here.
    718       if (Index->getType()->getScalarSizeInBits() !=
    719           NewInsts[GEP->getOperand(0)]->getType()->getScalarSizeInBits()) {
    720         Index = Builder.CreateSExtOrTrunc(
    721             Index, NewInsts[GEP->getOperand(0)]->getType(),
    722             GEP->getOperand(0)->getName() + ".sext");
    723       }
    724 
    725       auto *Op = NewInsts[GEP->getOperand(0)];
    726       if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
    727         NewInsts[GEP] = Index;
    728       else
    729         NewInsts[GEP] = Builder.CreateNSWAdd(
    730             Op, Index, GEP->getOperand(0)->getName() + ".add");
    731       continue;
    732     }
    733     if (isa<PHINode>(Val))
    734       continue;
    735 
    736     llvm_unreachable("Unexpected instruction type");
    737   }
    738 
    739   // Add the incoming values to the PHI nodes.
    740   for (Value *Val : Explored) {
    741     if (Val == Base)
    742       continue;
    743     // All the instructions have been created, we can now add edges to the
    744     // phi nodes.
    745     if (auto *PHI = dyn_cast<PHINode>(Val)) {
    746       PHINode *NewPhi = static_cast<PHINode *>(NewInsts[PHI]);
    747       for (unsigned I = 0, E = PHI->getNumIncomingValues(); I < E; ++I) {
    748         Value *NewIncoming = PHI->getIncomingValue(I);
    749 
    750         if (NewInsts.find(NewIncoming) != NewInsts.end())
    751           NewIncoming = NewInsts[NewIncoming];
    752 
    753         NewPhi->addIncoming(NewIncoming, PHI->getIncomingBlock(I));
    754       }
    755     }
    756   }
    757 
    758   for (Value *Val : Explored) {
    759     if (Val == Base)
    760       continue;
    761 
    762     // Depending on the type, for external users we have to emit
    763     // a GEP or a GEP + ptrtoint.
    764     setInsertionPoint(Builder, Val, false);
    765 
    766     // If required, create an inttoptr instruction for Base.
    767     Value *NewBase = Base;
    768     if (!Base->getType()->isPointerTy())
    769       NewBase = Builder.CreateBitOrPointerCast(Base, Start->getType(),
    770                                                Start->getName() + "to.ptr");
    771 
    772     Value *GEP = Builder.CreateInBoundsGEP(
    773         Start->getType()->getPointerElementType(), NewBase,
    774         makeArrayRef(NewInsts[Val]), Val->getName() + ".ptr");
    775 
    776     if (!Val->getType()->isPointerTy()) {
    777       Value *Cast = Builder.CreatePointerCast(GEP, Val->getType(),
    778                                               Val->getName() + ".conv");
    779       GEP = Cast;
    780     }
    781     Val->replaceAllUsesWith(GEP);
    782   }
    783 
    784   return NewInsts[Start];
    785 }
    786 
    787 /// Looks through GEPs, IntToPtrInsts and PtrToIntInsts in order to express
    788 /// the input Value as a constant indexed GEP. Returns a pair containing
    789 /// the GEPs Pointer and Index.
    790 static std::pair<Value *, Value *>
    791 getAsConstantIndexedAddress(Value *V, const DataLayout &DL) {
    792   Type *IndexType = IntegerType::get(V->getContext(),
    793                                      DL.getIndexTypeSizeInBits(V->getType()));
    794 
    795   Constant *Index = ConstantInt::getNullValue(IndexType);
    796   while (true) {
    797     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
    798       // We accept only inbouds GEPs here to exclude the possibility of
    799       // overflow.
    800       if (!GEP->isInBounds())
    801         break;
    802       if (GEP->hasAllConstantIndices() && GEP->getNumIndices() == 1 &&
    803           GEP->getType() == V->getType()) {
    804         V = GEP->getOperand(0);
    805         Constant *GEPIndex = static_cast<Constant *>(GEP->getOperand(1));
    806         Index = ConstantExpr::getAdd(
    807             Index, ConstantExpr::getSExtOrBitCast(GEPIndex, IndexType));
    808         continue;
    809       }
    810       break;
    811     }
    812     if (auto *CI = dyn_cast<IntToPtrInst>(V)) {
    813       if (!CI->isNoopCast(DL))
    814         break;
    815       V = CI->getOperand(0);
    816       continue;
    817     }
    818     if (auto *CI = dyn_cast<PtrToIntInst>(V)) {
    819       if (!CI->isNoopCast(DL))
    820         break;
    821       V = CI->getOperand(0);
    822       continue;
    823     }
    824     break;
    825   }
    826   return {V, Index};
    827 }
    828 
    829 /// Converts (CMP GEPLHS, RHS) if this change would make RHS a constant.
    830 /// We can look through PHIs, GEPs and casts in order to determine a common base
    831 /// between GEPLHS and RHS.
    832 static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,
    833                                               ICmpInst::Predicate Cond,
    834                                               const DataLayout &DL) {
    835   if (!GEPLHS->hasAllConstantIndices())
    836     return nullptr;
    837 
    838   // Make sure the pointers have the same type.
    839   if (GEPLHS->getType() != RHS->getType())
    840     return nullptr;
    841 
    842   Value *PtrBase, *Index;
    843   std::tie(PtrBase, Index) = getAsConstantIndexedAddress(GEPLHS, DL);
    844 
    845   // The set of nodes that will take part in this transformation.
    846   SetVector<Value *> Nodes;
    847 
    848   if (!canRewriteGEPAsOffset(RHS, PtrBase, DL, Nodes))
    849     return nullptr;
    850 
    851   // We know we can re-write this as
    852   //  ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)
    853   // Since we've only looked through inbouds GEPs we know that we
    854   // can't have overflow on either side. We can therefore re-write
    855   // this as:
    856   //   OFFSET1 cmp OFFSET2
    857   Value *NewRHS = rewriteGEPAsOffset(RHS, PtrBase, DL, Nodes);
    858 
    859   // RewriteGEPAsOffset has replaced RHS and all of its uses with a re-written
    860   // GEP having PtrBase as the pointer base, and has returned in NewRHS the
    861   // offset. Since Index is the offset of LHS to the base pointer, we will now
    862   // compare the offsets instead of comparing the pointers.
    863   return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Index, NewRHS);
    864 }
    865 
    866 /// Fold comparisons between a GEP instruction and something else. At this point
    867 /// we know that the GEP is on the LHS of the comparison.
    868 Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
    869                                        ICmpInst::Predicate Cond,
    870                                        Instruction &I) {
    871   // Don't transform signed compares of GEPs into index compares. Even if the
    872   // GEP is inbounds, the final add of the base pointer can have signed overflow
    873   // and would change the result of the icmp.
    874   // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be
    875   // the maximum signed value for the pointer type.
    876   if (ICmpInst::isSigned(Cond))
    877     return nullptr;
    878 
    879   // Look through bitcasts and addrspacecasts. We do not however want to remove
    880   // 0 GEPs.
    881   if (!isa<GetElementPtrInst>(RHS))
    882     RHS = RHS->stripPointerCasts();
    883 
    884   Value *PtrBase = GEPLHS->getOperand(0);
    885   if (PtrBase == RHS && GEPLHS->isInBounds()) {
    886     // ((gep Ptr, OFFSET) cmp Ptr)   ---> (OFFSET cmp 0).
    887     // This transformation (ignoring the base and scales) is valid because we
    888     // know pointers can't overflow since the gep is inbounds.  See if we can
    889     // output an optimized form.
    890     Value *Offset = evaluateGEPOffsetExpression(GEPLHS, *this, DL);
    891 
    892     // If not, synthesize the offset the hard way.
    893     if (!Offset)
    894       Offset = EmitGEPOffset(GEPLHS);
    895     return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
    896                         Constant::getNullValue(Offset->getType()));
    897   } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {
    898     // If the base pointers are different, but the indices are the same, just
    899     // compare the base pointer.
    900     if (PtrBase != GEPRHS->getOperand(0)) {
    901       bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands();
    902       IndicesTheSame &= GEPLHS->getOperand(0)->getType() ==
    903                         GEPRHS->getOperand(0)->getType();
    904       if (IndicesTheSame)
    905         for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
    906           if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
    907             IndicesTheSame = false;
    908             break;
    909           }
    910 
    911       // If all indices are the same, just compare the base pointers.
    912       if (IndicesTheSame)
    913         return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0));
    914 
    915       // If we're comparing GEPs with two base pointers that only differ in type
    916       // and both GEPs have only constant indices or just one use, then fold
    917       // the compare with the adjusted indices.
    918       if (GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
    919           (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
    920           (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
    921           PtrBase->stripPointerCasts() ==
    922               GEPRHS->getOperand(0)->stripPointerCasts()) {
    923         Value *LOffset = EmitGEPOffset(GEPLHS);
    924         Value *ROffset = EmitGEPOffset(GEPRHS);
    925 
    926         // If we looked through an addrspacecast between different sized address
    927         // spaces, the LHS and RHS pointers are different sized
    928         // integers. Truncate to the smaller one.
    929         Type *LHSIndexTy = LOffset->getType();
    930         Type *RHSIndexTy = ROffset->getType();
    931         if (LHSIndexTy != RHSIndexTy) {
    932           if (LHSIndexTy->getPrimitiveSizeInBits() <
    933               RHSIndexTy->getPrimitiveSizeInBits()) {
    934             ROffset = Builder.CreateTrunc(ROffset, LHSIndexTy);
    935           } else
    936             LOffset = Builder.CreateTrunc(LOffset, RHSIndexTy);
    937         }
    938 
    939         Value *Cmp = Builder.CreateICmp(ICmpInst::getSignedPredicate(Cond),
    940                                         LOffset, ROffset);
    941         return replaceInstUsesWith(I, Cmp);
    942       }
    943 
    944       // Otherwise, the base pointers are different and the indices are
    945       // different. Try convert this to an indexed compare by looking through
    946       // PHIs/casts.
    947       return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
    948     }
    949 
    950     // If one of the GEPs has all zero indices, recurse.
    951     if (GEPLHS->hasAllZeroIndices())
    952       return foldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
    953                          ICmpInst::getSwappedPredicate(Cond), I);
    954 
    955     // If the other GEP has all zero indices, recurse.
    956     if (GEPRHS->hasAllZeroIndices())
    957       return foldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
    958 
    959     bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds();
    960     if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) {
    961       // If the GEPs only differ by one index, compare it.
    962       unsigned NumDifferences = 0;  // Keep track of # differences.
    963       unsigned DiffOperand = 0;     // The operand that differs.
    964       for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
    965         if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
    966           if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() !=
    967                    GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) {
    968             // Irreconcilable differences.
    969             NumDifferences = 2;
    970             break;
    971           } else {
    972             if (NumDifferences++) break;
    973             DiffOperand = i;
    974           }
    975         }
    976 
    977       if (NumDifferences == 0)   // SAME GEP?
    978         return replaceInstUsesWith(I, // No comparison is needed here.
    979                              Builder.getInt1(ICmpInst::isTrueWhenEqual(Cond)));
    980 
    981       else if (NumDifferences == 1 && GEPsInBounds) {
    982         Value *LHSV = GEPLHS->getOperand(DiffOperand);
    983         Value *RHSV = GEPRHS->getOperand(DiffOperand);
    984         // Make sure we do a signed comparison here.
    985         return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV);
    986       }
    987     }
    988 
    989     // Only lower this if the icmp is the only user of the GEP or if we expect
    990     // the result to fold to a constant!
    991     if (GEPsInBounds && (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
    992         (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
    993       // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)  --->  (OFFSET1 cmp OFFSET2)
    994       Value *L = EmitGEPOffset(GEPLHS);
    995       Value *R = EmitGEPOffset(GEPRHS);
    996       return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
    997     }
    998   }
    999 
   1000   // Try convert this to an indexed compare by looking through PHIs/casts as a
   1001   // last resort.
   1002   return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
   1003 }
   1004 
   1005 Instruction *InstCombiner::foldAllocaCmp(ICmpInst &ICI,
   1006                                          const AllocaInst *Alloca,
   1007                                          const Value *Other) {
   1008   assert(ICI.isEquality() && "Cannot fold non-equality comparison.");
   1009 
   1010   // It would be tempting to fold away comparisons between allocas and any
   1011   // pointer not based on that alloca (e.g. an argument). However, even
   1012   // though such pointers cannot alias, they can still compare equal.
   1013   //
   1014   // But LLVM doesn't specify where allocas get their memory, so if the alloca
   1015   // doesn't escape we can argue that it's impossible to guess its value, and we
   1016   // can therefore act as if any such guesses are wrong.
   1017   //
   1018   // The code below checks that the alloca doesn't escape, and that it's only
   1019   // used in a comparison once (the current instruction). The
   1020   // single-comparison-use condition ensures that we're trivially folding all
   1021   // comparisons against the alloca consistently, and avoids the risk of
   1022   // erroneously folding a comparison of the pointer with itself.
   1023 
   1024   unsigned MaxIter = 32; // Break cycles and bound to constant-time.
   1025 
   1026   SmallVector<const Use *, 32> Worklist;
   1027   for (const Use &U : Alloca->uses()) {
   1028     if (Worklist.size() >= MaxIter)
   1029       return nullptr;
   1030     Worklist.push_back(&U);
   1031   }
   1032 
   1033   unsigned NumCmps = 0;
   1034   while (!Worklist.empty()) {
   1035     assert(Worklist.size() <= MaxIter);
   1036     const Use *U = Worklist.pop_back_val();
   1037     const Value *V = U->getUser();
   1038     --MaxIter;
   1039 
   1040     if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V) || isa<PHINode>(V) ||
   1041         isa<SelectInst>(V)) {
   1042       // Track the uses.
   1043     } else if (isa<LoadInst>(V)) {
   1044       // Loading from the pointer doesn't escape it.
   1045       continue;
   1046     } else if (const auto *SI = dyn_cast<StoreInst>(V)) {
   1047       // Storing *to* the pointer is fine, but storing the pointer escapes it.
   1048       if (SI->getValueOperand() == U->get())
   1049         return nullptr;
   1050       continue;
   1051     } else if (isa<ICmpInst>(V)) {
   1052       if (NumCmps++)
   1053         return nullptr; // Found more than one cmp.
   1054       continue;
   1055     } else if (const auto *Intrin = dyn_cast<IntrinsicInst>(V)) {
   1056       switch (Intrin->getIntrinsicID()) {
   1057         // These intrinsics don't escape or compare the pointer. Memset is safe
   1058         // because we don't allow ptrtoint. Memcpy and memmove are safe because
   1059         // we don't allow stores, so src cannot point to V.
   1060         case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
   1061         case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset:
   1062           continue;
   1063         default:
   1064           return nullptr;
   1065       }
   1066     } else {
   1067       return nullptr;
   1068     }
   1069     for (const Use &U : V->uses()) {
   1070       if (Worklist.size() >= MaxIter)
   1071         return nullptr;
   1072       Worklist.push_back(&U);
   1073     }
   1074   }
   1075 
   1076   Type *CmpTy = CmpInst::makeCmpResultType(Other->getType());
   1077   return replaceInstUsesWith(
   1078       ICI,
   1079       ConstantInt::get(CmpTy, !CmpInst::isTrueWhenEqual(ICI.getPredicate())));
   1080 }
   1081 
   1082 /// Fold "icmp pred (X+CI), X".
   1083 Instruction *InstCombiner::foldICmpAddOpConst(Value *X, ConstantInt *CI,
   1084                                               ICmpInst::Predicate Pred) {
   1085   // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
   1086   // so the values can never be equal.  Similarly for all other "or equals"
   1087   // operators.
   1088 
   1089   // (X+1) <u X        --> X >u (MAXUINT-1)        --> X == 255
   1090   // (X+2) <u X        --> X >u (MAXUINT-2)        --> X > 253
   1091   // (X+MAXUINT) <u X  --> X >u (MAXUINT-MAXUINT)  --> X != 0
   1092   if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
   1093     Value *R =
   1094       ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI);
   1095     return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
   1096   }
   1097 
   1098   // (X+1) >u X        --> X <u (0-1)        --> X != 255
   1099   // (X+2) >u X        --> X <u (0-2)        --> X <u 254
   1100   // (X+MAXUINT) >u X  --> X <u (0-MAXUINT)  --> X <u 1  --> X == 0
   1101   if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
   1102     return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI));
   1103 
   1104   unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits();
   1105   ConstantInt *SMax = ConstantInt::get(X->getContext(),
   1106                                        APInt::getSignedMaxValue(BitWidth));
   1107 
   1108   // (X+ 1) <s X       --> X >s (MAXSINT-1)          --> X == 127
   1109   // (X+ 2) <s X       --> X >s (MAXSINT-2)          --> X >s 125
   1110   // (X+MAXSINT) <s X  --> X >s (MAXSINT-MAXSINT)    --> X >s 0
   1111   // (X+MINSINT) <s X  --> X >s (MAXSINT-MINSINT)    --> X >s -1
   1112   // (X+ -2) <s X      --> X >s (MAXSINT- -2)        --> X >s 126
   1113   // (X+ -1) <s X      --> X >s (MAXSINT- -1)        --> X != 127
   1114   if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
   1115     return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI));
   1116 
   1117   // (X+ 1) >s X       --> X <s (MAXSINT-(1-1))       --> X != 127
   1118   // (X+ 2) >s X       --> X <s (MAXSINT-(2-1))       --> X <s 126
   1119   // (X+MAXSINT) >s X  --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1
   1120   // (X+MINSINT) >s X  --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2
   1121   // (X+ -2) >s X      --> X <s (MAXSINT-(-2-1))      --> X <s -126
   1122   // (X+ -1) >s X      --> X <s (MAXSINT-(-1-1))      --> X == -128
   1123 
   1124   assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
   1125   Constant *C = Builder.getInt(CI->getValue() - 1);
   1126   return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
   1127 }
   1128 
   1129 /// Handle "(icmp eq/ne (ashr/lshr AP2, A), AP1)" ->
   1130 /// (icmp eq/ne A, Log2(AP2/AP1)) ->
   1131 /// (icmp eq/ne A, Log2(AP2) - Log2(AP1)).
   1132 Instruction *InstCombiner::foldICmpShrConstConst(ICmpInst &I, Value *A,
   1133                                                  const APInt &AP1,
   1134                                                  const APInt &AP2) {
   1135   assert(I.isEquality() && "Cannot fold icmp gt/lt");
   1136 
   1137   auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
   1138     if (I.getPredicate() == I.ICMP_NE)
   1139       Pred = CmpInst::getInversePredicate(Pred);
   1140     return new ICmpInst(Pred, LHS, RHS);
   1141   };
   1142 
   1143   // Don't bother doing any work for cases which InstSimplify handles.
   1144   if (AP2.isNullValue())
   1145     return nullptr;
   1146 
   1147   bool IsAShr = isa<AShrOperator>(I.getOperand(0));
   1148   if (IsAShr) {
   1149     if (AP2.isAllOnesValue())
   1150       return nullptr;
   1151     if (AP2.isNegative() != AP1.isNegative())
   1152       return nullptr;
   1153     if (AP2.sgt(AP1))
   1154       return nullptr;
   1155   }
   1156 
   1157   if (!AP1)
   1158     // 'A' must be large enough to shift out the highest set bit.
   1159     return getICmp(I.ICMP_UGT, A,
   1160                    ConstantInt::get(A->getType(), AP2.logBase2()));
   1161 
   1162   if (AP1 == AP2)
   1163     return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
   1164 
   1165   int Shift;
   1166   if (IsAShr && AP1.isNegative())
   1167     Shift = AP1.countLeadingOnes() - AP2.countLeadingOnes();
   1168   else
   1169     Shift = AP1.countLeadingZeros() - AP2.countLeadingZeros();
   1170 
   1171   if (Shift > 0) {
   1172     if (IsAShr && AP1 == AP2.ashr(Shift)) {
   1173       // There are multiple solutions if we are comparing against -1 and the LHS
   1174       // of the ashr is not a power of two.
   1175       if (AP1.isAllOnesValue() && !AP2.isPowerOf2())
   1176         return getICmp(I.ICMP_UGE, A, ConstantInt::get(A->getType(), Shift));
   1177       return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
   1178     } else if (AP1 == AP2.lshr(Shift)) {
   1179       return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
   1180     }
   1181   }
   1182 
   1183   // Shifting const2 will never be equal to const1.
   1184   // FIXME: This should always be handled by InstSimplify?
   1185   auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE);
   1186   return replaceInstUsesWith(I, TorF);
   1187 }
   1188 
   1189 /// Handle "(icmp eq/ne (shl AP2, A), AP1)" ->
   1190 /// (icmp eq/ne A, TrailingZeros(AP1) - TrailingZeros(AP2)).
   1191 Instruction *InstCombiner::foldICmpShlConstConst(ICmpInst &I, Value *A,
   1192                                                  const APInt &AP1,
   1193                                                  const APInt &AP2) {
   1194   assert(I.isEquality() && "Cannot fold icmp gt/lt");
   1195 
   1196   auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
   1197     if (I.getPredicate() == I.ICMP_NE)
   1198       Pred = CmpInst::getInversePredicate(Pred);
   1199     return new ICmpInst(Pred, LHS, RHS);
   1200   };
   1201 
   1202   // Don't bother doing any work for cases which InstSimplify handles.
   1203   if (AP2.isNullValue())
   1204     return nullptr;
   1205 
   1206   unsigned AP2TrailingZeros = AP2.countTrailingZeros();
   1207 
   1208   if (!AP1 && AP2TrailingZeros != 0)
   1209     return getICmp(
   1210         I.ICMP_UGE, A,
   1211         ConstantInt::get(A->getType(), AP2.getBitWidth() - AP2TrailingZeros));
   1212 
   1213   if (AP1 == AP2)
   1214     return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
   1215 
   1216   // Get the distance between the lowest bits that are set.
   1217   int Shift = AP1.countTrailingZeros() - AP2TrailingZeros;
   1218 
   1219   if (Shift > 0 && AP2.shl(Shift) == AP1)
   1220     return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
   1221 
   1222   // Shifting const2 will never be equal to const1.
   1223   // FIXME: This should always be handled by InstSimplify?
   1224   auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE);
   1225   return replaceInstUsesWith(I, TorF);
   1226 }
   1227 
   1228 /// The caller has matched a pattern of the form:
   1229 ///   I = icmp ugt (add (add A, B), CI2), CI1
   1230 /// If this is of the form:
   1231 ///   sum = a + b
   1232 ///   if (sum+128 >u 255)
   1233 /// Then replace it with llvm.sadd.with.overflow.i8.
   1234 ///
   1235 static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
   1236                                           ConstantInt *CI2, ConstantInt *CI1,
   1237                                           InstCombiner &IC) {
   1238   // The transformation we're trying to do here is to transform this into an
   1239   // llvm.sadd.with.overflow.  To do this, we have to replace the original add
   1240   // with a narrower add, and discard the add-with-constant that is part of the
   1241   // range check (if we can't eliminate it, this isn't profitable).
   1242 
   1243   // In order to eliminate the add-with-constant, the compare can be its only
   1244   // use.
   1245   Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
   1246   if (!AddWithCst->hasOneUse())
   1247     return nullptr;
   1248 
   1249   // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
   1250   if (!CI2->getValue().isPowerOf2())
   1251     return nullptr;
   1252   unsigned NewWidth = CI2->getValue().countTrailingZeros();
   1253   if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31)
   1254     return nullptr;
   1255 
   1256   // The width of the new add formed is 1 more than the bias.
   1257   ++NewWidth;
   1258 
   1259   // Check to see that CI1 is an all-ones value with NewWidth bits.
   1260   if (CI1->getBitWidth() == NewWidth ||
   1261       CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
   1262     return nullptr;
   1263 
   1264   // This is only really a signed overflow check if the inputs have been
   1265   // sign-extended; check for that condition. For example, if CI2 is 2^31 and
   1266   // the operands of the add are 64 bits wide, we need at least 33 sign bits.
   1267   unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1;
   1268   if (IC.ComputeNumSignBits(A, 0, &I) < NeededSignBits ||
   1269       IC.ComputeNumSignBits(B, 0, &I) < NeededSignBits)
   1270     return nullptr;
   1271 
   1272   // In order to replace the original add with a narrower
   1273   // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
   1274   // and truncates that discard the high bits of the add.  Verify that this is
   1275   // the case.
   1276   Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
   1277   for (User *U : OrigAdd->users()) {
   1278     if (U == AddWithCst)
   1279       continue;
   1280 
   1281     // Only accept truncates for now.  We would really like a nice recursive
   1282     // predicate like SimplifyDemandedBits, but which goes downwards the use-def
   1283     // chain to see which bits of a value are actually demanded.  If the
   1284     // original add had another add which was then immediately truncated, we
   1285     // could still do the transformation.
   1286     TruncInst *TI = dyn_cast<TruncInst>(U);
   1287     if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth)
   1288       return nullptr;
   1289   }
   1290 
   1291   // If the pattern matches, truncate the inputs to the narrower type and
   1292   // use the sadd_with_overflow intrinsic to efficiently compute both the
   1293   // result and the overflow bit.
   1294   Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
   1295   Value *F = Intrinsic::getDeclaration(I.getModule(),
   1296                                        Intrinsic::sadd_with_overflow, NewType);
   1297 
   1298   InstCombiner::BuilderTy &Builder = IC.Builder;
   1299 
   1300   // Put the new code above the original add, in case there are any uses of the
   1301   // add between the add and the compare.
   1302   Builder.SetInsertPoint(OrigAdd);
   1303 
   1304   Value *TruncA = Builder.CreateTrunc(A, NewType, A->getName() + ".trunc");
   1305   Value *TruncB = Builder.CreateTrunc(B, NewType, B->getName() + ".trunc");
   1306   CallInst *Call = Builder.CreateCall(F, {TruncA, TruncB}, "sadd");
   1307   Value *Add = Builder.CreateExtractValue(Call, 0, "sadd.result");
   1308   Value *ZExt = Builder.CreateZExt(Add, OrigAdd->getType());
   1309 
   1310   // The inner add was the result of the narrow add, zero extended to the
   1311   // wider type.  Replace it with the result computed by the intrinsic.
   1312   IC.replaceInstUsesWith(*OrigAdd, ZExt);
   1313 
   1314   // The original icmp gets replaced with the overflow value.
   1315   return ExtractValueInst::Create(Call, 1, "sadd.overflow");
   1316 }
   1317 
   1318 // Handle (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
   1319 Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) {
   1320   CmpInst::Predicate Pred = Cmp.getPredicate();
   1321   Value *X = Cmp.getOperand(0);
   1322 
   1323   if (match(Cmp.getOperand(1), m_Zero()) && Pred == ICmpInst::ICMP_SGT) {
   1324     Value *A, *B;
   1325     SelectPatternResult SPR = matchSelectPattern(X, A, B);
   1326     if (SPR.Flavor == SPF_SMIN) {
   1327       if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT))
   1328         return new ICmpInst(Pred, B, Cmp.getOperand(1));
   1329       if (isKnownPositive(B, DL, 0, &AC, &Cmp, &DT))
   1330         return new ICmpInst(Pred, A, Cmp.getOperand(1));
   1331     }
   1332   }
   1333   return nullptr;
   1334 }
   1335 
   1336 // Fold icmp Pred X, C.
   1337 Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
   1338   CmpInst::Predicate Pred = Cmp.getPredicate();
   1339   Value *X = Cmp.getOperand(0);
   1340 
   1341   const APInt *C;
   1342   if (!match(Cmp.getOperand(1), m_APInt(C)))
   1343     return nullptr;
   1344 
   1345   Value *A = nullptr, *B = nullptr;
   1346 
   1347   // Match the following pattern, which is a common idiom when writing
   1348   // overflow-safe integer arithmetic functions. The source performs an addition
   1349   // in wider type and explicitly checks for overflow using comparisons against
   1350   // INT_MIN and INT_MAX. Simplify by using the sadd_with_overflow intrinsic.
   1351   //
   1352   // TODO: This could probably be generalized to handle other overflow-safe
   1353   // operations if we worked out the formulas to compute the appropriate magic
   1354   // constants.
   1355   //
   1356   // sum = a + b
   1357   // if (sum+128 >u 255)  ...  -> llvm.sadd.with.overflow.i8
   1358   {
   1359     ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI
   1360     if (Pred == ICmpInst::ICMP_UGT &&
   1361         match(X, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
   1362       if (Instruction *Res = processUGT_ADDCST_ADD(
   1363               Cmp, A, B, CI2, cast<ConstantInt>(Cmp.getOperand(1)), *this))
   1364         return Res;
   1365   }
   1366 
   1367   // FIXME: Use m_APInt to allow folds for splat constants.
   1368   ConstantInt *CI = dyn_cast<ConstantInt>(Cmp.getOperand(1));
   1369   if (!CI)
   1370     return nullptr;
   1371 
   1372   // Canonicalize icmp instructions based on dominating conditions.
   1373   BasicBlock *Parent = Cmp.getParent();
   1374   BasicBlock *Dom = Parent->getSinglePredecessor();
   1375   auto *BI = Dom ? dyn_cast<BranchInst>(Dom->getTerminator()) : nullptr;
   1376   ICmpInst::Predicate Pred2;
   1377   BasicBlock *TrueBB, *FalseBB;
   1378   ConstantInt *CI2;
   1379   if (BI && match(BI, m_Br(m_ICmp(Pred2, m_Specific(X), m_ConstantInt(CI2)),
   1380                            TrueBB, FalseBB)) &&
   1381       TrueBB != FalseBB) {
   1382     ConstantRange CR =
   1383         ConstantRange::makeAllowedICmpRegion(Pred, CI->getValue());
   1384     ConstantRange DominatingCR =
   1385         (Parent == TrueBB)
   1386             ? ConstantRange::makeExactICmpRegion(Pred2, CI2->getValue())
   1387             : ConstantRange::makeExactICmpRegion(
   1388                   CmpInst::getInversePredicate(Pred2), CI2->getValue());
   1389     ConstantRange Intersection = DominatingCR.intersectWith(CR);
   1390     ConstantRange Difference = DominatingCR.difference(CR);
   1391     if (Intersection.isEmptySet())
   1392       return replaceInstUsesWith(Cmp, Builder.getFalse());
   1393     if (Difference.isEmptySet())
   1394       return replaceInstUsesWith(Cmp, Builder.getTrue());
   1395 
   1396     // If this is a normal comparison, it demands all bits. If it is a sign
   1397     // bit comparison, it only demands the sign bit.
   1398     bool UnusedBit;
   1399     bool IsSignBit = isSignBitCheck(Pred, CI->getValue(), UnusedBit);
   1400 
   1401     // Canonicalizing a sign bit comparison that gets used in a branch,
   1402     // pessimizes codegen by generating branch on zero instruction instead
   1403     // of a test and branch. So we avoid canonicalizing in such situations
   1404     // because test and branch instruction has better branch displacement
   1405     // than compare and branch instruction.
   1406     if (Cmp.isEquality() || (IsSignBit && hasBranchUse(Cmp)))
   1407       return nullptr;
   1408 
   1409     if (auto *AI = Intersection.getSingleElement())
   1410       return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*AI));
   1411     if (auto *AD = Difference.getSingleElement())
   1412       return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*AD));
   1413   }
   1414 
   1415   return nullptr;
   1416 }
   1417 
   1418 /// Fold icmp (trunc X, Y), C.
   1419 Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp,
   1420                                                  TruncInst *Trunc,
   1421                                                  const APInt &C) {
   1422   ICmpInst::Predicate Pred = Cmp.getPredicate();
   1423   Value *X = Trunc->getOperand(0);
   1424   if (C.isOneValue() && C.getBitWidth() > 1) {
   1425     // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1
   1426     Value *V = nullptr;
   1427     if (Pred == ICmpInst::ICMP_SLT && match(X, m_Signum(m_Value(V))))
   1428       return new ICmpInst(ICmpInst::ICMP_SLT, V,
   1429                           ConstantInt::get(V->getType(), 1));
   1430   }
   1431 
   1432   if (Cmp.isEquality() && Trunc->hasOneUse()) {
   1433     // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
   1434     // of the high bits truncated out of x are known.
   1435     unsigned DstBits = Trunc->getType()->getScalarSizeInBits(),
   1436              SrcBits = X->getType()->getScalarSizeInBits();
   1437     KnownBits Known = computeKnownBits(X, 0, &Cmp);
   1438 
   1439     // If all the high bits are known, we can do this xform.
   1440     if ((Known.Zero | Known.One).countLeadingOnes() >= SrcBits - DstBits) {
   1441       // Pull in the high bits from known-ones set.
   1442       APInt NewRHS = C.zext(SrcBits);
   1443       NewRHS |= Known.One & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits);
   1444       return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), NewRHS));
   1445     }
   1446   }
   1447 
   1448   return nullptr;
   1449 }
   1450 
   1451 /// Fold icmp (xor X, Y), C.
   1452 Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp,
   1453                                                BinaryOperator *Xor,
   1454                                                const APInt &C) {
   1455   Value *X = Xor->getOperand(0);
   1456   Value *Y = Xor->getOperand(1);
   1457   const APInt *XorC;
   1458   if (!match(Y, m_APInt(XorC)))
   1459     return nullptr;
   1460 
   1461   // If this is a comparison that tests the signbit (X < 0) or (x > -1),
   1462   // fold the xor.
   1463   ICmpInst::Predicate Pred = Cmp.getPredicate();
   1464   bool TrueIfSigned = false;
   1465   if (isSignBitCheck(Cmp.getPredicate(), C, TrueIfSigned)) {
   1466 
   1467     // If the sign bit of the XorCst is not set, there is no change to
   1468     // the operation, just stop using the Xor.
   1469     if (!XorC->isNegative()) {
   1470       Cmp.setOperand(0, X);
   1471       Worklist.Add(Xor);
   1472       return &Cmp;
   1473     }
   1474 
   1475     // Emit the opposite comparison.
   1476     if (TrueIfSigned)
   1477       return new ICmpInst(ICmpInst::ICMP_SGT, X,
   1478                           ConstantInt::getAllOnesValue(X->getType()));
   1479     else
   1480       return new ICmpInst(ICmpInst::ICMP_SLT, X,
   1481                           ConstantInt::getNullValue(X->getType()));
   1482   }
   1483 
   1484   if (Xor->hasOneUse()) {
   1485     // (icmp u/s (xor X SignMask), C) -> (icmp s/u X, (xor C SignMask))
   1486     if (!Cmp.isEquality() && XorC->isSignMask()) {
   1487       Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate()
   1488                             : Cmp.getSignedPredicate();
   1489       return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC));
   1490     }
   1491 
   1492     // (icmp u/s (xor X ~SignMask), C) -> (icmp s/u X, (xor C ~SignMask))
   1493     if (!Cmp.isEquality() && XorC->isMaxSignedValue()) {
   1494       Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate()
   1495                             : Cmp.getSignedPredicate();
   1496       Pred = Cmp.getSwappedPredicate(Pred);
   1497       return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC));
   1498     }
   1499   }
   1500 
   1501   // (icmp ugt (xor X, C), ~C) -> (icmp ult X, C)
   1502   //   iff -C is a power of 2
   1503   if (Pred == ICmpInst::ICMP_UGT && *XorC == ~C && (C + 1).isPowerOf2())
   1504     return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
   1505 
   1506   // (icmp ult (xor X, C), -C) -> (icmp uge X, C)
   1507   //   iff -C is a power of 2
   1508   if (Pred == ICmpInst::ICMP_ULT && *XorC == -C && C.isPowerOf2())
   1509     return new ICmpInst(ICmpInst::ICMP_UGE, X, Y);
   1510 
   1511   return nullptr;
   1512 }
   1513 
   1514 /// Fold icmp (and (sh X, Y), C2), C1.
   1515 Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And,
   1516                                             const APInt &C1, const APInt &C2) {
   1517   BinaryOperator *Shift = dyn_cast<BinaryOperator>(And->getOperand(0));
   1518   if (!Shift || !Shift->isShift())
   1519     return nullptr;
   1520 
   1521   // If this is: (X >> C3) & C2 != C1 (where any shift and any compare could
   1522   // exist), turn it into (X & (C2 << C3)) != (C1 << C3). This happens a LOT in
   1523   // code produced by the clang front-end, for bitfield access.
   1524   // This seemingly simple opportunity to fold away a shift turns out to be
   1525   // rather complicated. See PR17827 for details.
   1526   unsigned ShiftOpcode = Shift->getOpcode();
   1527   bool IsShl = ShiftOpcode == Instruction::Shl;
   1528   const APInt *C3;
   1529   if (match(Shift->getOperand(1), m_APInt(C3))) {
   1530     bool CanFold = false;
   1531     if (ShiftOpcode == Instruction::Shl) {
   1532       // For a left shift, we can fold if the comparison is not signed. We can
   1533       // also fold a signed comparison if the mask value and comparison value
   1534       // are not negative. These constraints may not be obvious, but we can
   1535       // prove that they are correct using an SMT solver.
   1536       if (!Cmp.isSigned() || (!C2.isNegative() && !C1.isNegative()))
   1537         CanFold = true;
   1538     } else {
   1539       bool IsAshr = ShiftOpcode == Instruction::AShr;
   1540       // For a logical right shift, we can fold if the comparison is not signed.
   1541       // We can also fold a signed comparison if the shifted mask value and the
   1542       // shifted comparison value are not negative. These constraints may not be
   1543       // obvious, but we can prove that they are correct using an SMT solver.
   1544       // For an arithmetic shift right we can do the same, if we ensure
   1545       // the And doesn't use any bits being shifted in. Normally these would
   1546       // be turned into lshr by SimplifyDemandedBits, but not if there is an
   1547       // additional user.
   1548       if (!IsAshr || (C2.shl(*C3).lshr(*C3) == C2)) {
   1549         if (!Cmp.isSigned() ||
   1550             (!C2.shl(*C3).isNegative() && !C1.shl(*C3).isNegative()))
   1551           CanFold = true;
   1552       }
   1553     }
   1554 
   1555     if (CanFold) {
   1556       APInt NewCst = IsShl ? C1.lshr(*C3) : C1.shl(*C3);
   1557       APInt SameAsC1 = IsShl ? NewCst.shl(*C3) : NewCst.lshr(*C3);
   1558       // Check to see if we are shifting out any of the bits being compared.
   1559       if (SameAsC1 != C1) {
   1560         // If we shifted bits out, the fold is not going to work out. As a
   1561         // special case, check to see if this means that the result is always
   1562         // true or false now.
   1563         if (Cmp.getPredicate() == ICmpInst::ICMP_EQ)
   1564           return replaceInstUsesWith(Cmp, ConstantInt::getFalse(Cmp.getType()));
   1565         if (Cmp.getPredicate() == ICmpInst::ICMP_NE)
   1566           return replaceInstUsesWith(Cmp, ConstantInt::getTrue(Cmp.getType()));
   1567       } else {
   1568         Cmp.setOperand(1, ConstantInt::get(And->getType(), NewCst));
   1569         APInt NewAndCst = IsShl ? C2.lshr(*C3) : C2.shl(*C3);
   1570         And->setOperand(1, ConstantInt::get(And->getType(), NewAndCst));
   1571         And->setOperand(0, Shift->getOperand(0));
   1572         Worklist.Add(Shift); // Shift is dead.
   1573         return &Cmp;
   1574       }
   1575     }
   1576   }
   1577 
   1578   // Turn ((X >> Y) & C2) == 0  into  (X & (C2 << Y)) == 0.  The latter is
   1579   // preferable because it allows the C2 << Y expression to be hoisted out of a
   1580   // loop if Y is invariant and X is not.
   1581   if (Shift->hasOneUse() && C1.isNullValue() && Cmp.isEquality() &&
   1582       !Shift->isArithmeticShift() && !isa<Constant>(Shift->getOperand(0))) {
   1583     // Compute C2 << Y.
   1584     Value *NewShift =
   1585         IsShl ? Builder.CreateLShr(And->getOperand(1), Shift->getOperand(1))
   1586               : Builder.CreateShl(And->getOperand(1), Shift->getOperand(1));
   1587 
   1588     // Compute X & (C2 << Y).
   1589     Value *NewAnd = Builder.CreateAnd(Shift->getOperand(0), NewShift);
   1590     Cmp.setOperand(0, NewAnd);
   1591     return &Cmp;
   1592   }
   1593 
   1594   return nullptr;
   1595 }
   1596 
   1597 /// Fold icmp (and X, C2), C1.
   1598 Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp,
   1599                                                  BinaryOperator *And,
   1600                                                  const APInt &C1) {
   1601   const APInt *C2;
   1602   if (!match(And->getOperand(1), m_APInt(C2)))
   1603     return nullptr;
   1604 
   1605   if (!And->hasOneUse())
   1606     return nullptr;
   1607 
   1608   // If the LHS is an 'and' of a truncate and we can widen the and/compare to
   1609   // the input width without changing the value produced, eliminate the cast:
   1610   //
   1611   // icmp (and (trunc W), C2), C1 -> icmp (and W, C2'), C1'
   1612   //
   1613   // We can do this transformation if the constants do not have their sign bits
   1614   // set or if it is an equality comparison. Extending a relational comparison
   1615   // when we're checking the sign bit would not work.
   1616   Value *W;
   1617   if (match(And->getOperand(0), m_OneUse(m_Trunc(m_Value(W)))) &&
   1618       (Cmp.isEquality() || (!C1.isNegative() && !C2->isNegative()))) {
   1619     // TODO: Is this a good transform for vectors? Wider types may reduce
   1620     // throughput. Should this transform be limited (even for scalars) by using
   1621     // shouldChangeType()?
   1622     if (!Cmp.getType()->isVectorTy()) {
   1623       Type *WideType = W->getType();
   1624       unsigned WideScalarBits = WideType->getScalarSizeInBits();
   1625       Constant *ZextC1 = ConstantInt::get(WideType, C1.zext(WideScalarBits));
   1626       Constant *ZextC2 = ConstantInt::get(WideType, C2->zext(WideScalarBits));
   1627       Value *NewAnd = Builder.CreateAnd(W, ZextC2, And->getName());
   1628       return new ICmpInst(Cmp.getPredicate(), NewAnd, ZextC1);
   1629     }
   1630   }
   1631 
   1632   if (Instruction *I = foldICmpAndShift(Cmp, And, C1, *C2))
   1633     return I;
   1634 
   1635   // (icmp pred (and (or (lshr A, B), A), 1), 0) -->
   1636   // (icmp pred (and A, (or (shl 1, B), 1), 0))
   1637   //
   1638   // iff pred isn't signed
   1639   if (!Cmp.isSigned() && C1.isNullValue() && And->getOperand(0)->hasOneUse() &&
   1640       match(And->getOperand(1), m_One())) {
   1641     Constant *One = cast<Constant>(And->getOperand(1));
   1642     Value *Or = And->getOperand(0);
   1643     Value *A, *B, *LShr;
   1644     if (match(Or, m_Or(m_Value(LShr), m_Value(A))) &&
   1645         match(LShr, m_LShr(m_Specific(A), m_Value(B)))) {
   1646       unsigned UsesRemoved = 0;
   1647       if (And->hasOneUse())
   1648         ++UsesRemoved;
   1649       if (Or->hasOneUse())
   1650         ++UsesRemoved;
   1651       if (LShr->hasOneUse())
   1652         ++UsesRemoved;
   1653 
   1654       // Compute A & ((1 << B) | 1)
   1655       Value *NewOr = nullptr;
   1656       if (auto *C = dyn_cast<Constant>(B)) {
   1657         if (UsesRemoved >= 1)
   1658           NewOr = ConstantExpr::getOr(ConstantExpr::getNUWShl(One, C), One);
   1659       } else {
   1660         if (UsesRemoved >= 3)
   1661           NewOr = Builder.CreateOr(Builder.CreateShl(One, B, LShr->getName(),
   1662                                                      /*HasNUW=*/true),
   1663                                    One, Or->getName());
   1664       }
   1665       if (NewOr) {
   1666         Value *NewAnd = Builder.CreateAnd(A, NewOr, And->getName());
   1667         Cmp.setOperand(0, NewAnd);
   1668         return &Cmp;
   1669       }
   1670     }
   1671   }
   1672 
   1673   return nullptr;
   1674 }
   1675 
   1676 /// Fold icmp (and X, Y), C.
   1677 Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp,
   1678                                                BinaryOperator *And,
   1679                                                const APInt &C) {
   1680   if (Instruction *I = foldICmpAndConstConst(Cmp, And, C))
   1681     return I;
   1682 
   1683   // TODO: These all require that Y is constant too, so refactor with the above.
   1684 
   1685   // Try to optimize things like "A[i] & 42 == 0" to index computations.
   1686   Value *X = And->getOperand(0);
   1687   Value *Y = And->getOperand(1);
   1688   if (auto *LI = dyn_cast<LoadInst>(X))
   1689     if (auto *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
   1690       if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
   1691         if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
   1692             !LI->isVolatile() && isa<ConstantInt>(Y)) {
   1693           ConstantInt *C2 = cast<ConstantInt>(Y);
   1694           if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, Cmp, C2))
   1695             return Res;
   1696         }
   1697 
   1698   if (!Cmp.isEquality())
   1699     return nullptr;
   1700 
   1701   // X & -C == -C -> X >  u ~C
   1702   // X & -C != -C -> X <= u ~C
   1703   //   iff C is a power of 2
   1704   if (Cmp.getOperand(1) == Y && (-C).isPowerOf2()) {
   1705     auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT
   1706                                                           : CmpInst::ICMP_ULE;
   1707     return new ICmpInst(NewPred, X, SubOne(cast<Constant>(Cmp.getOperand(1))));
   1708   }
   1709 
   1710   // (X & C2) == 0 -> (trunc X) >= 0
   1711   // (X & C2) != 0 -> (trunc X) <  0
   1712   //   iff C2 is a power of 2 and it masks the sign bit of a legal integer type.
   1713   const APInt *C2;
   1714   if (And->hasOneUse() && C.isNullValue() && match(Y, m_APInt(C2))) {
   1715     int32_t ExactLogBase2 = C2->exactLogBase2();
   1716     if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
   1717       Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1);
   1718       if (And->getType()->isVectorTy())
   1719         NTy = VectorType::get(NTy, And->getType()->getVectorNumElements());
   1720       Value *Trunc = Builder.CreateTrunc(X, NTy);
   1721       auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_SGE
   1722                                                             : CmpInst::ICMP_SLT;
   1723       return new ICmpInst(NewPred, Trunc, Constant::getNullValue(NTy));
   1724     }
   1725   }
   1726 
   1727   return nullptr;
   1728 }
   1729 
   1730 /// Fold icmp (or X, Y), C.
   1731 Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
   1732                                               const APInt &C) {
   1733   ICmpInst::Predicate Pred = Cmp.getPredicate();
   1734   if (C.isOneValue()) {
   1735     // icmp slt signum(V) 1 --> icmp slt V, 1
   1736     Value *V = nullptr;
   1737     if (Pred == ICmpInst::ICMP_SLT && match(Or, m_Signum(m_Value(V))))
   1738       return new ICmpInst(ICmpInst::ICMP_SLT, V,
   1739                           ConstantInt::get(V->getType(), 1));
   1740   }
   1741 
   1742   // X | C == C --> X <=u C
   1743   // X | C != C --> X  >u C
   1744   //   iff C+1 is a power of 2 (C is a bitmask of the low bits)
   1745   if (Cmp.isEquality() && Cmp.getOperand(1) == Or->getOperand(1) &&
   1746       (C + 1).isPowerOf2()) {
   1747     Pred = (Pred == CmpInst::ICMP_EQ) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
   1748     return new ICmpInst(Pred, Or->getOperand(0), Or->getOperand(1));
   1749   }
   1750 
   1751   if (!Cmp.isEquality() || !C.isNullValue() || !Or->hasOneUse())
   1752     return nullptr;
   1753 
   1754   Value *P, *Q;
   1755   if (match(Or, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
   1756     // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
   1757     // -> and (icmp eq P, null), (icmp eq Q, null).
   1758     Value *CmpP =
   1759         Builder.CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType()));
   1760     Value *CmpQ =
   1761         Builder.CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType()));
   1762     auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
   1763     return BinaryOperator::Create(BOpc, CmpP, CmpQ);
   1764   }
   1765 
   1766   // Are we using xors to bitwise check for a pair of (in)equalities? Convert to
   1767   // a shorter form that has more potential to be folded even further.
   1768   Value *X1, *X2, *X3, *X4;
   1769   if (match(Or->getOperand(0), m_OneUse(m_Xor(m_Value(X1), m_Value(X2)))) &&
   1770       match(Or->getOperand(1), m_OneUse(m_Xor(m_Value(X3), m_Value(X4))))) {
   1771     // ((X1 ^ X2) || (X3 ^ X4)) == 0 --> (X1 == X2) && (X3 == X4)
   1772     // ((X1 ^ X2) || (X3 ^ X4)) != 0 --> (X1 != X2) || (X3 != X4)
   1773     Value *Cmp12 = Builder.CreateICmp(Pred, X1, X2);
   1774     Value *Cmp34 = Builder.CreateICmp(Pred, X3, X4);
   1775     auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
   1776     return BinaryOperator::Create(BOpc, Cmp12, Cmp34);
   1777   }
   1778 
   1779   return nullptr;
   1780 }
   1781 
   1782 /// Fold icmp (mul X, Y), C.
   1783 Instruction *InstCombiner::foldICmpMulConstant(ICmpInst &Cmp,
   1784                                                BinaryOperator *Mul,
   1785                                                const APInt &C) {
   1786   const APInt *MulC;
   1787   if (!match(Mul->getOperand(1), m_APInt(MulC)))
   1788     return nullptr;
   1789 
   1790   // If this is a test of the sign bit and the multiply is sign-preserving with
   1791   // a constant operand, use the multiply LHS operand instead.
   1792   ICmpInst::Predicate Pred = Cmp.getPredicate();
   1793   if (isSignTest(Pred, C) && Mul->hasNoSignedWrap()) {
   1794     if (MulC->isNegative())
   1795       Pred = ICmpInst::getSwappedPredicate(Pred);
   1796     return new ICmpInst(Pred, Mul->getOperand(0),
   1797                         Constant::getNullValue(Mul->getType()));
   1798   }
   1799 
   1800   return nullptr;
   1801 }
   1802 
   1803 /// Fold icmp (shl 1, Y), C.
   1804 static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl,
   1805                                    const APInt &C) {
   1806   Value *Y;
   1807   if (!match(Shl, m_Shl(m_One(), m_Value(Y))))
   1808     return nullptr;
   1809 
   1810   Type *ShiftType = Shl->getType();
   1811   unsigned TypeBits = C.getBitWidth();
   1812   bool CIsPowerOf2 = C.isPowerOf2();
   1813   ICmpInst::Predicate Pred = Cmp.getPredicate();
   1814   if (Cmp.isUnsigned()) {
   1815     // (1 << Y) pred C -> Y pred Log2(C)
   1816     if (!CIsPowerOf2) {
   1817       // (1 << Y) <  30 -> Y <= 4
   1818       // (1 << Y) <= 30 -> Y <= 4
   1819       // (1 << Y) >= 30 -> Y >  4
   1820       // (1 << Y) >  30 -> Y >  4
   1821       if (Pred == ICmpInst::ICMP_ULT)
   1822         Pred = ICmpInst::ICMP_ULE;
   1823       else if (Pred == ICmpInst::ICMP_UGE)
   1824         Pred = ICmpInst::ICMP_UGT;
   1825     }
   1826 
   1827     // (1 << Y) >= 2147483648 -> Y >= 31 -> Y == 31
   1828     // (1 << Y) <  2147483648 -> Y <  31 -> Y != 31
   1829     unsigned CLog2 = C.logBase2();
   1830     if (CLog2 == TypeBits - 1) {
   1831       if (Pred == ICmpInst::ICMP_UGE)
   1832         Pred = ICmpInst::ICMP_EQ;
   1833       else if (Pred == ICmpInst::ICMP_ULT)
   1834         Pred = ICmpInst::ICMP_NE;
   1835     }
   1836     return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, CLog2));
   1837   } else if (Cmp.isSigned()) {
   1838     Constant *BitWidthMinusOne = ConstantInt::get(ShiftType, TypeBits - 1);
   1839     if (C.isAllOnesValue()) {
   1840       // (1 << Y) <= -1 -> Y == 31
   1841       if (Pred == ICmpInst::ICMP_SLE)
   1842         return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne);
   1843 
   1844       // (1 << Y) >  -1 -> Y != 31
   1845       if (Pred == ICmpInst::ICMP_SGT)
   1846         return new ICmpInst(ICmpInst::ICMP_NE, Y, BitWidthMinusOne);
   1847     } else if (!C) {
   1848       // (1 << Y) <  0 -> Y == 31
   1849       // (1 << Y) <= 0 -> Y == 31
   1850       if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
   1851         return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne);
   1852 
   1853       // (1 << Y) >= 0 -> Y != 31
   1854       // (1 << Y) >  0 -> Y != 31
   1855       if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE)
   1856         return new ICmpInst(ICmpInst::ICMP_NE, Y, BitWidthMinusOne);
   1857     }
   1858   } else if (Cmp.isEquality() && CIsPowerOf2) {
   1859     return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, C.logBase2()));
   1860   }
   1861 
   1862   return nullptr;
   1863 }
   1864 
   1865 /// Fold icmp (shl X, Y), C.
   1866 Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
   1867                                                BinaryOperator *Shl,
   1868                                                const APInt &C) {
   1869   const APInt *ShiftVal;
   1870   if (Cmp.isEquality() && match(Shl->getOperand(0), m_APInt(ShiftVal)))
   1871     return foldICmpShlConstConst(Cmp, Shl->getOperand(1), C, *ShiftVal);
   1872 
   1873   const APInt *ShiftAmt;
   1874   if (!match(Shl->getOperand(1), m_APInt(ShiftAmt)))
   1875     return foldICmpShlOne(Cmp, Shl, C);
   1876 
   1877   // Check that the shift amount is in range. If not, don't perform undefined
   1878   // shifts. When the shift is visited, it will be simplified.
   1879   unsigned TypeBits = C.getBitWidth();
   1880   if (ShiftAmt->uge(TypeBits))
   1881     return nullptr;
   1882 
   1883   ICmpInst::Predicate Pred = Cmp.getPredicate();
   1884   Value *X = Shl->getOperand(0);
   1885   Type *ShType = Shl->getType();
   1886 
   1887   // NSW guarantees that we are only shifting out sign bits from the high bits,
   1888   // so we can ASHR the compare constant without needing a mask and eliminate
   1889   // the shift.
   1890   if (Shl->hasNoSignedWrap()) {
   1891     if (Pred == ICmpInst::ICMP_SGT) {
   1892       // icmp Pred (shl nsw X, ShiftAmt), C --> icmp Pred X, (C >>s ShiftAmt)
   1893       APInt ShiftedC = C.ashr(*ShiftAmt);
   1894       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
   1895     }
   1896     if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
   1897         C.ashr(*ShiftAmt).shl(*ShiftAmt) == C) {
   1898       APInt ShiftedC = C.ashr(*ShiftAmt);
   1899       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
   1900     }
   1901     if (Pred == ICmpInst::ICMP_SLT) {
   1902       // SLE is the same as above, but SLE is canonicalized to SLT, so convert:
   1903       // (X << S) <=s C is equiv to X <=s (C >> S) for all C
   1904       // (X << S) <s (C + 1) is equiv to X <s (C >> S) + 1 if C <s SMAX
   1905       // (X << S) <s C is equiv to X <s ((C - 1) >> S) + 1 if C >s SMIN
   1906       assert(!C.isMinSignedValue() && "Unexpected icmp slt");
   1907       APInt ShiftedC = (C - 1).ashr(*ShiftAmt) + 1;
   1908       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
   1909     }
   1910     // If this is a signed comparison to 0 and the shift is sign preserving,
   1911     // use the shift LHS operand instead; isSignTest may change 'Pred', so only
   1912     // do that if we're sure to not continue on in this function.
   1913     if (isSignTest(Pred, C))
   1914       return new ICmpInst(Pred, X, Constant::getNullValue(ShType));
   1915   }
   1916 
   1917   // NUW guarantees that we are only shifting out zero bits from the high bits,
   1918   // so we can LSHR the compare constant without needing a mask and eliminate
   1919   // the shift.
   1920   if (Shl->hasNoUnsignedWrap()) {
   1921     if (Pred == ICmpInst::ICMP_UGT) {
   1922       // icmp Pred (shl nuw X, ShiftAmt), C --> icmp Pred X, (C >>u ShiftAmt)
   1923       APInt ShiftedC = C.lshr(*ShiftAmt);
   1924       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
   1925     }
   1926     if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
   1927         C.lshr(*ShiftAmt).shl(*ShiftAmt) == C) {
   1928       APInt ShiftedC = C.lshr(*ShiftAmt);
   1929       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
   1930     }
   1931     if (Pred == ICmpInst::ICMP_ULT) {
   1932       // ULE is the same as above, but ULE is canonicalized to ULT, so convert:
   1933       // (X << S) <=u C is equiv to X <=u (C >> S) for all C
   1934       // (X << S) <u (C + 1) is equiv to X <u (C >> S) + 1 if C <u ~0u
   1935       // (X << S) <u C is equiv to X <u ((C - 1) >> S) + 1 if C >u 0
   1936       assert(C.ugt(0) && "ult 0 should have been eliminated");
   1937       APInt ShiftedC = (C - 1).lshr(*ShiftAmt) + 1;
   1938       return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
   1939     }
   1940   }
   1941 
   1942   if (Cmp.isEquality() && Shl->hasOneUse()) {
   1943     // Strength-reduce the shift into an 'and'.
   1944     Constant *Mask = ConstantInt::get(
   1945         ShType,
   1946         APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue()));
   1947     Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask");
   1948     Constant *LShrC = ConstantInt::get(ShType, C.lshr(*ShiftAmt));
   1949     return new ICmpInst(Pred, And, LShrC);
   1950   }
   1951 
   1952   // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
   1953   bool TrueIfSigned = false;
   1954   if (Shl->hasOneUse() && isSignBitCheck(Pred, C, TrueIfSigned)) {
   1955     // (X << 31) <s 0  --> (X & 1) != 0
   1956     Constant *Mask = ConstantInt::get(
   1957         ShType,
   1958         APInt::getOneBitSet(TypeBits, TypeBits - ShiftAmt->getZExtValue() - 1));
   1959     Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask");
   1960     return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
   1961                         And, Constant::getNullValue(ShType));
   1962   }
   1963 
   1964   // Transform (icmp pred iM (shl iM %v, N), C)
   1965   // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (C>>N))
   1966   // Transform the shl to a trunc if (trunc (C>>N)) has no loss and M-N.
   1967   // This enables us to get rid of the shift in favor of a trunc that may be
   1968   // free on the target. It has the additional benefit of comparing to a
   1969   // smaller constant that may be more target-friendly.
   1970   unsigned Amt = ShiftAmt->getLimitedValue(TypeBits - 1);
   1971   if (Shl->hasOneUse() && Amt != 0 && C.countTrailingZeros() >= Amt &&
   1972       DL.isLegalInteger(TypeBits - Amt)) {
   1973     Type *TruncTy = IntegerType::get(Cmp.getContext(), TypeBits - Amt);
   1974     if (ShType->isVectorTy())
   1975       TruncTy = VectorType::get(TruncTy, ShType->getVectorNumElements());
   1976     Constant *NewC =
   1977         ConstantInt::get(TruncTy, C.ashr(*ShiftAmt).trunc(TypeBits - Amt));
   1978     return new ICmpInst(Pred, Builder.CreateTrunc(X, TruncTy), NewC);
   1979   }
   1980 
   1981   return nullptr;
   1982 }
   1983 
   1984 /// Fold icmp ({al}shr X, Y), C.
   1985 Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp,
   1986                                                BinaryOperator *Shr,
   1987                                                const APInt &C) {
   1988   // An exact shr only shifts out zero bits, so:
   1989   // icmp eq/ne (shr X, Y), 0 --> icmp eq/ne X, 0
   1990   Value *X = Shr->getOperand(0);
   1991   CmpInst::Predicate Pred = Cmp.getPredicate();
   1992   if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() &&
   1993       C.isNullValue())
   1994     return new ICmpInst(Pred, X, Cmp.getOperand(1));
   1995 
   1996   const APInt *ShiftVal;
   1997   if (Cmp.isEquality() && match(Shr->getOperand(0), m_APInt(ShiftVal)))
   1998     return foldICmpShrConstConst(Cmp, Shr->getOperand(1), C, *ShiftVal);
   1999 
   2000   const APInt *ShiftAmt;
   2001   if (!match(Shr->getOperand(1), m_APInt(ShiftAmt)))
   2002     return nullptr;
   2003 
   2004   // Check that the shift amount is in range. If not, don't perform undefined
   2005   // shifts. When the shift is visited it will be simplified.
   2006   unsigned TypeBits = C.getBitWidth();
   2007   unsigned ShAmtVal = ShiftAmt->getLimitedValue(TypeBits);
   2008   if (ShAmtVal >= TypeBits || ShAmtVal == 0)
   2009     return nullptr;
   2010 
   2011   bool IsAShr = Shr->getOpcode() == Instruction::AShr;
   2012   bool IsExact = Shr->isExact();
   2013   Type *ShrTy = Shr->getType();
   2014   // TODO: If we could guarantee that InstSimplify would handle all of the
   2015   // constant-value-based preconditions in the folds below, then we could assert
   2016   // those conditions rather than checking them. This is difficult because of
   2017   // undef/poison (PR34838).
   2018   if (IsAShr) {
   2019     if (Pred == CmpInst::ICMP_SLT || (Pred == CmpInst::ICMP_SGT && IsExact)) {
   2020       // icmp slt (ashr X, ShAmtC), C --> icmp slt X, (C << ShAmtC)
   2021       // icmp sgt (ashr exact X, ShAmtC), C --> icmp sgt X, (C << ShAmtC)
   2022       APInt ShiftedC = C.shl(ShAmtVal);
   2023       if (ShiftedC.ashr(ShAmtVal) == C)
   2024         return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
   2025     }
   2026     if (Pred == CmpInst::ICMP_SGT) {
   2027       // icmp sgt (ashr X, ShAmtC), C --> icmp sgt X, ((C + 1) << ShAmtC) - 1
   2028       APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
   2029       if (!C.isMaxSignedValue() && !(C + 1).shl(ShAmtVal).isMinSignedValue() &&
   2030           (ShiftedC + 1).ashr(ShAmtVal) == (C + 1))
   2031         return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
   2032     }
   2033   } else {
   2034     if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) {
   2035       // icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC)
   2036       // icmp ugt (lshr exact X, ShAmtC), C --> icmp ugt X, (C << ShAmtC)
   2037       APInt ShiftedC = C.shl(ShAmtVal);
   2038       if (ShiftedC.lshr(ShAmtVal) == C)
   2039         return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
   2040     }
   2041     if (Pred == CmpInst::ICMP_UGT) {
   2042       // icmp ugt (lshr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1
   2043       APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
   2044       if ((ShiftedC + 1).lshr(ShAmtVal) == (C + 1))
   2045         return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
   2046     }
   2047   }
   2048 
   2049   if (!Cmp.isEquality())
   2050     return nullptr;
   2051 
   2052   // Handle equality comparisons of shift-by-constant.
   2053 
   2054   // If the comparison constant changes with the shift, the comparison cannot
   2055   // succeed (bits of the comparison constant cannot match the shifted value).
   2056   // This should be known by InstSimplify and already be folded to true/false.
   2057   assert(((IsAShr && C.shl(ShAmtVal).ashr(ShAmtVal) == C) ||
   2058           (!IsAShr && C.shl(ShAmtVal).lshr(ShAmtVal) == C)) &&
   2059          "Expected icmp+shr simplify did not occur.");
   2060 
   2061   // If the bits shifted out are known zero, compare the unshifted value:
   2062   //  (X & 4) >> 1 == 2  --> (X & 4) == 4.
   2063   if (Shr->isExact())
   2064     return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, C << ShAmtVal));
   2065 
   2066   if (Shr->hasOneUse()) {
   2067     // Canonicalize the shift into an 'and':
   2068     // icmp eq/ne (shr X, ShAmt), C --> icmp eq/ne (and X, HiMask), (C << ShAmt)
   2069     APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
   2070     Constant *Mask = ConstantInt::get(ShrTy, Val);
   2071     Value *And = Builder.CreateAnd(X, Mask, Shr->getName() + ".mask");
   2072     return new ICmpInst(Pred, And, ConstantInt::get(ShrTy, C << ShAmtVal));
   2073   }
   2074 
   2075   return nullptr;
   2076 }
   2077 
   2078 /// Fold icmp (udiv X, Y), C.
   2079 Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp,
   2080                                                 BinaryOperator *UDiv,
   2081                                                 const APInt &C) {
   2082   const APInt *C2;
   2083   if (!match(UDiv->getOperand(0), m_APInt(C2)))
   2084     return nullptr;
   2085 
   2086   assert(*C2 != 0 && "udiv 0, X should have been simplified already.");
   2087 
   2088   // (icmp ugt (udiv C2, Y), C) -> (icmp ule Y, C2/(C+1))
   2089   Value *Y = UDiv->getOperand(1);
   2090   if (Cmp.getPredicate() == ICmpInst::ICMP_UGT) {
   2091     assert(!C.isMaxValue() &&
   2092            "icmp ugt X, UINT_MAX should have been simplified already.");
   2093     return new ICmpInst(ICmpInst::ICMP_ULE, Y,
   2094                         ConstantInt::get(Y->getType(), C2->udiv(C + 1)));
   2095   }
   2096 
   2097   // (icmp ult (udiv C2, Y), C) -> (icmp ugt Y, C2/C)
   2098   if (Cmp.getPredicate() == ICmpInst::ICMP_ULT) {
   2099     assert(C != 0 && "icmp ult X, 0 should have been simplified already.");
   2100     return new ICmpInst(ICmpInst::ICMP_UGT, Y,
   2101                         ConstantInt::get(Y->getType(), C2->udiv(C)));
   2102   }
   2103 
   2104   return nullptr;
   2105 }
   2106 
   2107 /// Fold icmp ({su}div X, Y), C.
   2108 Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
   2109                                                BinaryOperator *Div,
   2110                                                const APInt &C) {
   2111   // Fold: icmp pred ([us]div X, C2), C -> range test
   2112   // Fold this div into the comparison, producing a range check.
   2113   // Determine, based on the divide type, what the range is being
   2114   // checked.  If there is an overflow on the low or high side, remember
   2115   // it, otherwise compute the range [low, hi) bounding the new value.
   2116   // See: InsertRangeTest above for the kinds of replacements possible.
   2117   const APInt *C2;
   2118   if (!match(Div->getOperand(1), m_APInt(C2)))
   2119     return nullptr;
   2120 
   2121   // FIXME: If the operand types don't match the type of the divide
   2122   // then don't attempt this transform. The code below doesn't have the
   2123   // logic to deal with a signed divide and an unsigned compare (and
   2124   // vice versa). This is because (x /s C2) <s C  produces different
   2125   // results than (x /s C2) <u C or (x /u C2) <s C or even
   2126   // (x /u C2) <u C.  Simply casting the operands and result won't
   2127   // work. :(  The if statement below tests that condition and bails
   2128   // if it finds it.
   2129   bool DivIsSigned = Div->getOpcode() == Instruction::SDiv;
   2130   if (!Cmp.isEquality() && DivIsSigned != Cmp.isSigned())
   2131     return nullptr;
   2132 
   2133   // The ProdOV computation fails on divide by 0 and divide by -1. Cases with
   2134   // INT_MIN will also fail if the divisor is 1. Although folds of all these
   2135   // division-by-constant cases should be present, we can not assert that they
   2136   // have happened before we reach this icmp instruction.
   2137   if (C2->isNullValue() || C2->isOneValue() ||
   2138       (DivIsSigned && C2->isAllOnesValue()))
   2139     return nullptr;
   2140 
   2141   // Compute Prod = C * C2. We are essentially solving an equation of
   2142   // form X / C2 = C. We solve for X by multiplying C2 and C.
   2143   // By solving for X, we can turn this into a range check instead of computing
   2144   // a divide.
   2145   APInt Prod = C * *C2;
   2146 
   2147   // Determine if the product overflows by seeing if the product is not equal to
   2148   // the divide. Make sure we do the same kind of divide as in the LHS
   2149   // instruction that we're folding.
   2150   bool ProdOV = (DivIsSigned ? Prod.sdiv(*C2) : Prod.udiv(*C2)) != C;
   2151 
   2152   ICmpInst::Predicate Pred = Cmp.getPredicate();
   2153 
   2154   // If the division is known to be exact, then there is no remainder from the
   2155   // divide, so the covered range size is unit, otherwise it is the divisor.
   2156   APInt RangeSize = Div->isExact() ? APInt(C2->getBitWidth(), 1) : *C2;
   2157 
   2158   // Figure out the interval that is being checked.  For example, a comparison
   2159   // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
   2160   // Compute this interval based on the constants involved and the signedness of
   2161   // the compare/divide.  This computes a half-open interval, keeping track of
   2162   // whether either value in the interval overflows.  After analysis each
   2163   // overflow variable is set to 0 if it's corresponding bound variable is valid
   2164   // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
   2165   int LoOverflow = 0, HiOverflow = 0;
   2166   APInt LoBound, HiBound;
   2167 
   2168   if (!DivIsSigned) {  // udiv
   2169     // e.g. X/5 op 3  --> [15, 20)
   2170     LoBound = Prod;
   2171     HiOverflow = LoOverflow = ProdOV;
   2172     if (!HiOverflow) {
   2173       // If this is not an exact divide, then many values in the range collapse
   2174       // to the same result value.
   2175       HiOverflow = addWithOverflow(HiBound, LoBound, RangeSize, false);
   2176     }
   2177   } else if (C2->isStrictlyPositive()) { // Divisor is > 0.
   2178     if (C.isNullValue()) {       // (X / pos) op 0
   2179       // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
   2180       LoBound = -(RangeSize - 1);
   2181       HiBound = RangeSize;
   2182     } else if (C.isStrictlyPositive()) {   // (X / pos) op pos
   2183       LoBound = Prod;     // e.g.   X/5 op 3 --> [15, 20)
   2184       HiOverflow = LoOverflow = ProdOV;
   2185       if (!HiOverflow)
   2186         HiOverflow = addWithOverflow(HiBound, Prod, RangeSize, true);
   2187     } else {                       // (X / pos) op neg
   2188       // e.g. X/5 op -3  --> [-15-4, -15+1) --> [-19, -14)
   2189       HiBound = Prod + 1;
   2190       LoOverflow = HiOverflow = ProdOV ? -1 : 0;
   2191       if (!LoOverflow) {
   2192         APInt DivNeg = -RangeSize;
   2193         LoOverflow = addWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
   2194       }
   2195     }
   2196   } else if (C2->isNegative()) { // Divisor is < 0.
   2197     if (Div->isExact())
   2198       RangeSize.negate();
   2199     if (C.isNullValue()) { // (X / neg) op 0
   2200       // e.g. X/-5 op 0  --> [-4, 5)
   2201       LoBound = RangeSize + 1;
   2202       HiBound = -RangeSize;
   2203       if (HiBound == *C2) {        // -INTMIN = INTMIN
   2204         HiOverflow = 1;            // [INTMIN+1, overflow)
   2205         HiBound = APInt();         // e.g. X/INTMIN = 0 --> X > INTMIN
   2206       }
   2207     } else if (C.isStrictlyPositive()) {   // (X / neg) op pos
   2208       // e.g. X/-5 op 3  --> [-19, -14)
   2209       HiBound = Prod + 1;
   2210       HiOverflow = LoOverflow = ProdOV ? -1 : 0;
   2211       if (!LoOverflow)
   2212         LoOverflow = addWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
   2213     } else {                       // (X / neg) op neg
   2214       LoBound = Prod;       // e.g. X/-5 op -3  --> [15, 20)
   2215       LoOverflow = HiOverflow = ProdOV;
   2216       if (!HiOverflow)
   2217         HiOverflow = subWithOverflow(HiBound, Prod, RangeSize, true);
   2218     }
   2219 
   2220     // Dividing by a negative swaps the condition.  LT <-> GT
   2221     Pred = ICmpInst::getSwappedPredicate(Pred);
   2222   }
   2223 
   2224   Value *X = Div->getOperand(0);
   2225   switch (Pred) {
   2226     default: llvm_unreachable("Unhandled icmp opcode!");
   2227     case ICmpInst::ICMP_EQ:
   2228       if (LoOverflow && HiOverflow)
   2229         return replaceInstUsesWith(Cmp, Builder.getFalse());
   2230       if (HiOverflow)
   2231         return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
   2232                             ICmpInst::ICMP_UGE, X,
   2233                             ConstantInt::get(Div->getType(), LoBound));
   2234       if (LoOverflow)
   2235         return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
   2236                             ICmpInst::ICMP_ULT, X,
   2237                             ConstantInt::get(Div->getType(), HiBound));
   2238       return replaceInstUsesWith(
   2239           Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, true));
   2240     case ICmpInst::ICMP_NE:
   2241       if (LoOverflow && HiOverflow)
   2242         return replaceInstUsesWith(Cmp, Builder.getTrue());
   2243       if (HiOverflow)
   2244         return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
   2245                             ICmpInst::ICMP_ULT, X,
   2246                             ConstantInt::get(Div->getType(), LoBound));
   2247       if (LoOverflow)
   2248         return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
   2249                             ICmpInst::ICMP_UGE, X,
   2250                             ConstantInt::get(Div->getType(), HiBound));
   2251       return replaceInstUsesWith(Cmp,
   2252                                  insertRangeTest(X, LoBound, HiBound,
   2253                                                  DivIsSigned, false));
   2254     case ICmpInst::ICMP_ULT:
   2255     case ICmpInst::ICMP_SLT:
   2256       if (LoOverflow == +1)   // Low bound is greater than input range.
   2257         return replaceInstUsesWith(Cmp, Builder.getTrue());
   2258       if (LoOverflow == -1)   // Low bound is less than input range.
   2259         return replaceInstUsesWith(Cmp, Builder.getFalse());
   2260       return new ICmpInst(Pred, X, ConstantInt::get(Div->getType(), LoBound));
   2261     case ICmpInst::ICMP_UGT:
   2262     case ICmpInst::ICMP_SGT:
   2263       if (HiOverflow == +1)       // High bound greater than input range.
   2264         return replaceInstUsesWith(Cmp, Builder.getFalse());
   2265       if (HiOverflow == -1)       // High bound less than input range.
   2266         return replaceInstUsesWith(Cmp, Builder.getTrue());
   2267       if (Pred == ICmpInst::ICMP_UGT)
   2268         return new ICmpInst(ICmpInst::ICMP_UGE, X,
   2269                             ConstantInt::get(Div->getType(), HiBound));
   2270       return new ICmpInst(ICmpInst::ICMP_SGE, X,
   2271                           ConstantInt::get(Div->getType(), HiBound));
   2272   }
   2273 
   2274   return nullptr;
   2275 }
   2276 
   2277 /// Fold icmp (sub X, Y), C.
   2278 Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp,
   2279                                                BinaryOperator *Sub,
   2280                                                const APInt &C) {
   2281   Value *X = Sub->getOperand(0), *Y = Sub->getOperand(1);
   2282   ICmpInst::Predicate Pred = Cmp.getPredicate();
   2283 
   2284   // The following transforms are only worth it if the only user of the subtract
   2285   // is the icmp.
   2286   if (!Sub->hasOneUse())
   2287     return nullptr;
   2288 
   2289   if (Sub->hasNoSignedWrap()) {
   2290     // (icmp sgt (sub nsw X, Y), -1) -> (icmp sge X, Y)
   2291     if (Pred == ICmpInst::ICMP_SGT && C.isAllOnesValue())
   2292       return new ICmpInst(ICmpInst::ICMP_SGE, X, Y);
   2293 
   2294     // (icmp sgt (sub nsw X, Y), 0) -> (icmp sgt X, Y)
   2295     if (Pred == ICmpInst::ICMP_SGT && C.isNullValue())
   2296       return new ICmpInst(ICmpInst::ICMP_SGT, X, Y);
   2297 
   2298     // (icmp slt (sub nsw X, Y), 0) -> (icmp slt X, Y)
   2299     if (Pred == ICmpInst::ICMP_SLT && C.isNullValue())
   2300       return new ICmpInst(ICmpInst::ICMP_SLT, X, Y);
   2301 
   2302     // (icmp slt (sub nsw X, Y), 1) -> (icmp sle X, Y)
   2303     if (Pred == ICmpInst::ICMP_SLT && C.isOneValue())
   2304       return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
   2305   }
   2306 
   2307   const APInt *C2;
   2308   if (!match(X, m_APInt(C2)))
   2309     return nullptr;
   2310 
   2311   // C2 - Y <u C -> (Y | (C - 1)) == C2
   2312   //   iff (C2 & (C - 1)) == C - 1 and C is a power of 2
   2313   if (Pred == ICmpInst::ICMP_ULT && C.isPowerOf2() &&
   2314       (*C2 & (C - 1)) == (C - 1))
   2315     return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateOr(Y, C - 1), X);
   2316 
   2317   // C2 - Y >u C -> (Y | C) != C2
   2318   //   iff C2 & C == C and C + 1 is a power of 2
   2319   if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == C)
   2320     return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateOr(Y, C), X);
   2321 
   2322   return nullptr;
   2323 }
   2324 
   2325 /// Fold icmp (add X, Y), C.
   2326 Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
   2327                                                BinaryOperator *Add,
   2328                                                const APInt &C) {
   2329   Value *Y = Add->getOperand(1);
   2330   const APInt *C2;
   2331   if (Cmp.isEquality() || !match(Y, m_APInt(C2)))
   2332     return nullptr;
   2333 
   2334   // Fold icmp pred (add X, C2), C.
   2335   Value *X = Add->getOperand(0);
   2336   Type *Ty = Add->getType();
   2337   CmpInst::Predicate Pred = Cmp.getPredicate();
   2338 
   2339   // If the add does not wrap, we can always adjust the compare by subtracting
   2340   // the constants. Equality comparisons are handled elsewhere. SGE/SLE are
   2341   // canonicalized to SGT/SLT.
   2342   if (Add->hasNoSignedWrap() &&
   2343       (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT)) {
   2344     bool Overflow;
   2345     APInt NewC = C.ssub_ov(*C2, Overflow);
   2346     // If there is overflow, the result must be true or false.
   2347     // TODO: Can we assert there is no overflow because InstSimplify always
   2348     // handles those cases?
   2349     if (!Overflow)
   2350       // icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2)
   2351       return new ICmpInst(Pred, X, ConstantInt::get(Ty, NewC));
   2352   }
   2353 
   2354   auto CR = ConstantRange::makeExactICmpRegion(Pred, C).subtract(*C2);
   2355   const APInt &Upper = CR.getUpper();
   2356   const APInt &Lower = CR.getLower();
   2357   if (Cmp.isSigned()) {
   2358     if (Lower.isSignMask())
   2359       return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, Upper));
   2360     if (Upper.isSignMask())
   2361       return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, Lower));
   2362   } else {
   2363     if (Lower.isMinValue())
   2364       return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantInt::get(Ty, Upper));
   2365     if (Upper.isMinValue())
   2366       return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, Lower));
   2367   }
   2368 
   2369   if (!Add->hasOneUse())
   2370     return nullptr;
   2371 
   2372   // X+C <u C2 -> (X & -C2) == C
   2373   //   iff C & (C2-1) == 0
   2374   //       C2 is a power of 2
   2375   if (Pred == ICmpInst::ICMP_ULT && C.isPowerOf2() && (*C2 & (C - 1)) == 0)
   2376     return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateAnd(X, -C),
   2377                         ConstantExpr::getNeg(cast<Constant>(Y)));
   2378 
   2379   // X+C >u C2 -> (X & ~C2) != C
   2380   //   iff C & C2 == 0
   2381   //       C2+1 is a power of 2
   2382   if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == 0)
   2383     return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateAnd(X, ~C),
   2384                         ConstantExpr::getNeg(cast<Constant>(Y)));
   2385 
   2386   return nullptr;
   2387 }
   2388 
   2389 bool InstCombiner::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS,
   2390                                            Value *&RHS, ConstantInt *&Less,
   2391                                            ConstantInt *&Equal,
   2392                                            ConstantInt *&Greater) {
   2393   // TODO: Generalize this to work with other comparison idioms or ensure
   2394   // they get canonicalized into this form.
   2395 
   2396   // select i1 (a == b), i32 Equal, i32 (select i1 (a < b), i32 Less, i32
   2397   // Greater), where Equal, Less and Greater are placeholders for any three
   2398   // constants.
   2399   ICmpInst::Predicate PredA, PredB;
   2400   if (match(SI->getTrueValue(), m_ConstantInt(Equal)) &&
   2401       match(SI->getCondition(), m_ICmp(PredA, m_Value(LHS), m_Value(RHS))) &&
   2402       PredA == ICmpInst::ICMP_EQ &&
   2403       match(SI->getFalseValue(),
   2404             m_Select(m_ICmp(PredB, m_Specific(LHS), m_Specific(RHS)),
   2405                      m_ConstantInt(Less), m_ConstantInt(Greater))) &&
   2406       PredB == ICmpInst::ICMP_SLT) {
   2407     return true;
   2408   }
   2409   return false;
   2410 }
   2411 
   2412 Instruction *InstCombiner::foldICmpSelectConstant(ICmpInst &Cmp,
   2413                                                   SelectInst *Select,
   2414                                                   ConstantInt *C) {
   2415 
   2416   assert(C && "Cmp RHS should be a constant int!");
   2417   // If we're testing a constant value against the result of a three way
   2418   // comparison, the result can be expressed directly in terms of the
   2419   // original values being compared.  Note: We could possibly be more
   2420   // aggressive here and remove the hasOneUse test. The original select is
   2421   // really likely to simplify or sink when we remove a test of the result.
   2422   Value *OrigLHS, *OrigRHS;
   2423   ConstantInt *C1LessThan, *C2Equal, *C3GreaterThan;
   2424   if (Cmp.hasOneUse() &&
   2425       matchThreeWayIntCompare(Select, OrigLHS, OrigRHS, C1LessThan, C2Equal,
   2426                               C3GreaterThan)) {
   2427     assert(C1LessThan && C2Equal && C3GreaterThan);
   2428 
   2429     bool TrueWhenLessThan =
   2430         ConstantExpr::getCompare(Cmp.getPredicate(), C1LessThan, C)
   2431             ->isAllOnesValue();
   2432     bool TrueWhenEqual =
   2433         ConstantExpr::getCompare(Cmp.getPredicate(), C2Equal, C)
   2434             ->isAllOnesValue();
   2435     bool TrueWhenGreaterThan =
   2436         ConstantExpr::getCompare(Cmp.getPredicate(), C3GreaterThan, C)
   2437             ->isAllOnesValue();
   2438 
   2439     // This generates the new instruction that will replace the original Cmp
   2440     // Instruction. Instead of enumerating the various combinations when
   2441     // TrueWhenLessThan, TrueWhenEqual and TrueWhenGreaterThan are true versus
   2442     // false, we rely on chaining of ORs and future passes of InstCombine to
   2443     // simplify the OR further (i.e. a s< b || a == b becomes a s<= b).
   2444 
   2445     // When none of the three constants satisfy the predicate for the RHS (C),
   2446     // the entire original Cmp can be simplified to a false.
   2447     Value *Cond = Builder.getFalse();
   2448     if (TrueWhenLessThan)
   2449       Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SLT, OrigLHS, OrigRHS));
   2450     if (TrueWhenEqual)
   2451       Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_EQ, OrigLHS, OrigRHS));
   2452     if (TrueWhenGreaterThan)
   2453       Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SGT, OrigLHS, OrigRHS));
   2454 
   2455     return replaceInstUsesWith(Cmp, Cond);
   2456   }
   2457   return nullptr;
   2458 }
   2459 
   2460 Instruction *InstCombiner::foldICmpBitCastConstant(ICmpInst &Cmp,
   2461                                                    BitCastInst *Bitcast,
   2462                                                    const APInt &C) {
   2463   // Folding: icmp <pred> iN X, C
   2464   //  where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN
   2465   //    and C is a splat of a K-bit pattern
   2466   //    and SC is a constant vector = <C', C', C', ..., C'>
   2467   // Into:
   2468   //   %E = extractelement <M x iK> %vec, i32 C'
   2469   //   icmp <pred> iK %E, trunc(C)
   2470   if (!Bitcast->getType()->isIntegerTy() ||
   2471       !Bitcast->getSrcTy()->isIntOrIntVectorTy())
   2472     return nullptr;
   2473 
   2474   Value *BCIOp = Bitcast->getOperand(0);
   2475   Value *Vec = nullptr;     // 1st vector arg of the shufflevector
   2476   Constant *Mask = nullptr; // Mask arg of the shufflevector
   2477   if (match(BCIOp,
   2478             m_ShuffleVector(m_Value(Vec), m_Undef(), m_Constant(Mask)))) {
   2479     // Check whether every element of Mask is the same constant
   2480     if (auto *Elem = dyn_cast_or_null<ConstantInt>(Mask->getSplatValue())) {
   2481       auto *VecTy = cast<VectorType>(BCIOp->getType());
   2482       auto *EltTy = cast<IntegerType>(VecTy->getElementType());
   2483       auto Pred = Cmp.getPredicate();
   2484       if (C.isSplat(EltTy->getBitWidth())) {
   2485         // Fold the icmp based on the value of C
   2486         // If C is M copies of an iK sized bit pattern,
   2487         // then:
   2488         //   =>  %E = extractelement <N x iK> %vec, i32 Elem
   2489         //       icmp <pred> iK %SplatVal, <pattern>
   2490         Value *Extract = Builder.CreateExtractElement(Vec, Elem);
   2491         Value *NewC = ConstantInt::get(EltTy, C.trunc(EltTy->getBitWidth()));
   2492         return new ICmpInst(Pred, Extract, NewC);
   2493       }
   2494     }
   2495   }
   2496   return nullptr;
   2497 }
   2498 
   2499 /// Try to fold integer comparisons with a constant operand: icmp Pred X, C
   2500 /// where X is some kind of instruction.
   2501 Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) {
   2502   const APInt *C;
   2503   if (!match(Cmp.getOperand(1), m_APInt(C)))
   2504     return nullptr;
   2505 
   2506   if (auto *BO = dyn_cast<BinaryOperator>(Cmp.getOperand(0))) {
   2507     switch (BO->getOpcode()) {
   2508     case Instruction::Xor:
   2509       if (Instruction *I = foldICmpXorConstant(Cmp, BO, *C))
   2510         return I;
   2511       break;
   2512     case Instruction::And:
   2513       if (Instruction *I = foldICmpAndConstant(Cmp, BO, *C))
   2514         return I;
   2515       break;
   2516     case Instruction::Or:
   2517       if (Instruction *I = foldICmpOrConstant(Cmp, BO, *C))
   2518         return I;
   2519       break;
   2520     case Instruction::Mul:
   2521       if (Instruction *I = foldICmpMulConstant(Cmp, BO, *C))
   2522         return I;
   2523       break;
   2524     case Instruction::Shl:
   2525       if (Instruction *I = foldICmpShlConstant(Cmp, BO, *C))
   2526         return I;
   2527       break;
   2528     case Instruction::LShr:
   2529     case Instruction::AShr:
   2530       if (Instruction *I = foldICmpShrConstant(Cmp, BO, *C))
   2531         return I;
   2532       break;
   2533     case Instruction::UDiv:
   2534       if (Instruction *I = foldICmpUDivConstant(Cmp, BO, *C))
   2535         return I;
   2536       LLVM_FALLTHROUGH;
   2537     case Instruction::SDiv:
   2538       if (Instruction *I = foldICmpDivConstant(Cmp, BO, *C))
   2539         return I;
   2540       break;
   2541     case Instruction::Sub:
   2542       if (Instruction *I = foldICmpSubConstant(Cmp, BO, *C))
   2543         return I;
   2544       break;
   2545     case Instruction::Add:
   2546       if (Instruction *I = foldICmpAddConstant(Cmp, BO, *C))
   2547         return I;
   2548       break;
   2549     default:
   2550       break;
   2551     }
   2552     // TODO: These folds could be refactored to be part of the above calls.
   2553     if (Instruction *I = foldICmpBinOpEqualityWithConstant(Cmp, BO, *C))
   2554       return I;
   2555   }
   2556 
   2557   // Match against CmpInst LHS being instructions other than binary operators.
   2558 
   2559   if (auto *SI = dyn_cast<SelectInst>(Cmp.getOperand(0))) {
   2560     // For now, we only support constant integers while folding the
   2561     // ICMP(SELECT)) pattern. We can extend this to support vector of integers
   2562     // similar to the cases handled by binary ops above.
   2563     if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(Cmp.getOperand(1)))
   2564       if (Instruction *I = foldICmpSelectConstant(Cmp, SI, ConstRHS))
   2565         return I;
   2566   }
   2567 
   2568   if (auto *TI = dyn_cast<TruncInst>(Cmp.getOperand(0))) {
   2569     if (Instruction *I = foldICmpTruncConstant(Cmp, TI, *C))
   2570       return I;
   2571   }
   2572 
   2573   if (auto *BCI = dyn_cast<BitCastInst>(Cmp.getOperand(0))) {
   2574     if (Instruction *I = foldICmpBitCastConstant(Cmp, BCI, *C))
   2575       return I;
   2576   }
   2577 
   2578   if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, *C))
   2579     return I;
   2580 
   2581   return nullptr;
   2582 }
   2583 
   2584 /// Fold an icmp equality instruction with binary operator LHS and constant RHS:
   2585 /// icmp eq/ne BO, C.
   2586 Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
   2587                                                              BinaryOperator *BO,
   2588                                                              const APInt &C) {
   2589   // TODO: Some of these folds could work with arbitrary constants, but this
   2590   // function is limited to scalar and vector splat constants.
   2591   if (!Cmp.isEquality())
   2592     return nullptr;
   2593 
   2594   ICmpInst::Predicate Pred = Cmp.getPredicate();
   2595   bool isICMP_NE = Pred == ICmpInst::ICMP_NE;
   2596   Constant *RHS = cast<Constant>(Cmp.getOperand(1));
   2597   Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
   2598 
   2599   switch (BO->getOpcode()) {
   2600   case Instruction::SRem:
   2601     // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
   2602     if (C.isNullValue() && BO->hasOneUse()) {
   2603       const APInt *BOC;
   2604       if (match(BOp1, m_APInt(BOC)) && BOC->sgt(1) && BOC->isPowerOf2()) {
   2605         Value *NewRem = Builder.CreateURem(BOp0, BOp1, BO->getName());
   2606         return new ICmpInst(Pred, NewRem,
   2607                             Constant::getNullValue(BO->getType()));
   2608       }
   2609     }
   2610     break;
   2611   case Instruction::Add: {
   2612     // Replace ((add A, B) != C) with (A != C-B) if B & C are constants.
   2613     const APInt *BOC;
   2614     if (match(BOp1, m_APInt(BOC))) {
   2615       if (BO->hasOneUse()) {
   2616         Constant *SubC = ConstantExpr::getSub(RHS, cast<Constant>(BOp1));
   2617         return new ICmpInst(Pred, BOp0, SubC);
   2618       }
   2619     } else if (C.isNullValue()) {
   2620       // Replace ((add A, B) != 0) with (A != -B) if A or B is
   2621       // efficiently invertible, or if the add has just this one use.
   2622       if (Value *NegVal = dyn_castNegVal(BOp1))
   2623         return new ICmpInst(Pred, BOp0, NegVal);
   2624       if (Value *NegVal = dyn_castNegVal(BOp0))
   2625         return new ICmpInst(Pred, NegVal, BOp1);
   2626       if (BO->hasOneUse()) {
   2627         Value *Neg = Builder.CreateNeg(BOp1);
   2628         Neg->takeName(BO);
   2629         return new ICmpInst(Pred, BOp0, Neg);
   2630       }
   2631     }
   2632     break;
   2633   }
   2634   case Instruction::Xor:
   2635     if (BO->hasOneUse()) {
   2636       if (Constant *BOC = dyn_cast<Constant>(BOp1)) {
   2637         // For the xor case, we can xor two constants together, eliminating
   2638         // the explicit xor.
   2639         return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC));
   2640       } else if (C.isNullValue()) {
   2641         // Replace ((xor A, B) != 0) with (A != B)
   2642         return new ICmpInst(Pred, BOp0, BOp1);
   2643       }
   2644     }
   2645     break;
   2646   case Instruction::Sub:
   2647     if (BO->hasOneUse()) {
   2648       const APInt *BOC;
   2649       if (match(BOp0, m_APInt(BOC))) {
   2650         // Replace ((sub BOC, B) != C) with (B != BOC-C).
   2651         Constant *SubC = ConstantExpr::getSub(cast<Constant>(BOp0), RHS);
   2652         return new ICmpInst(Pred, BOp1, SubC);
   2653       } else if (C.isNullValue()) {
   2654         // Replace ((sub A, B) != 0) with (A != B).
   2655         return new ICmpInst(Pred, BOp0, BOp1);
   2656       }
   2657     }
   2658     break;
   2659   case Instruction::Or: {
   2660     const APInt *BOC;
   2661     if (match(BOp1, m_APInt(BOC)) && BO->hasOneUse() && RHS->isAllOnesValue()) {
   2662       // Comparing if all bits outside of a constant mask are set?
   2663       // Replace (X | C) == -1 with (X & ~C) == ~C.
   2664       // This removes the -1 constant.
   2665       Constant *NotBOC = ConstantExpr::getNot(cast<Constant>(BOp1));
   2666       Value *And = Builder.CreateAnd(BOp0, NotBOC);
   2667       return new ICmpInst(Pred, And, NotBOC);
   2668     }
   2669     break;
   2670   }
   2671   case Instruction::And: {
   2672     const APInt *BOC;
   2673     if (match(BOp1, m_APInt(BOC))) {
   2674       // If we have ((X & C) == C), turn it into ((X & C) != 0).
   2675       if (C == *BOC && C.isPowerOf2())
   2676         return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
   2677                             BO, Constant::getNullValue(RHS->getType()));
   2678 
   2679       // Don't perform the following transforms if the AND has multiple uses
   2680       if (!BO->hasOneUse())
   2681         break;
   2682 
   2683       // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
   2684       if (BOC->isSignMask()) {
   2685         Constant *Zero = Constant::getNullValue(BOp0->getType());
   2686         auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
   2687         return new ICmpInst(NewPred, BOp0, Zero);
   2688       }
   2689 
   2690       // ((X & ~7) == 0) --> X < 8
   2691       if (C.isNullValue() && (~(*BOC) + 1).isPowerOf2()) {
   2692         Constant *NegBOC = ConstantExpr::getNeg(cast<Constant>(BOp1));
   2693         auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
   2694         return new ICmpInst(NewPred, BOp0, NegBOC);
   2695       }
   2696     }
   2697     break;
   2698   }
   2699   case Instruction::Mul:
   2700     if (C.isNullValue() && BO->hasNoSignedWrap()) {
   2701       const APInt *BOC;
   2702       if (match(BOp1, m_APInt(BOC)) && !BOC->isNullValue()) {
   2703         // The trivial case (mul X, 0) is handled by InstSimplify.
   2704         // General case : (mul X, C) != 0 iff X != 0
   2705         //                (mul X, C) == 0 iff X == 0
   2706         return new ICmpInst(Pred, BOp0, Constant::getNullValue(RHS->getType()));
   2707       }
   2708     }
   2709     break;
   2710   case Instruction::UDiv:
   2711     if (C.isNullValue()) {
   2712       // (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A)
   2713       auto NewPred = isICMP_NE ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT;
   2714       return new ICmpInst(NewPred, BOp1, BOp0);
   2715     }
   2716     break;
   2717   default:
   2718     break;
   2719   }
   2720   return nullptr;
   2721 }
   2722 
   2723 /// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
   2724 Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
   2725                                                          const APInt &C) {
   2726   IntrinsicInst *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0));
   2727   if (!II || !Cmp.isEquality())
   2728     return nullptr;
   2729 
   2730   // Handle icmp {eq|ne} <intrinsic>, Constant.
   2731   Type *Ty = II->getType();
   2732   switch (II->getIntrinsicID()) {
   2733   case Intrinsic::bswap:
   2734     Worklist.Add(II);
   2735     Cmp.setOperand(0, II->getArgOperand(0));
   2736     Cmp.setOperand(1, ConstantInt::get(Ty, C.byteSwap()));
   2737     return &Cmp;
   2738 
   2739   case Intrinsic::ctlz:
   2740   case Intrinsic::cttz:
   2741     // ctz(A) == bitwidth(A)  ->  A == 0 and likewise for !=
   2742     if (C == C.getBitWidth()) {
   2743       Worklist.Add(II);
   2744       Cmp.setOperand(0, II->getArgOperand(0));
   2745       Cmp.setOperand(1, ConstantInt::getNullValue(Ty));
   2746       return &Cmp;
   2747     }
   2748     break;
   2749 
   2750   case Intrinsic::ctpop: {
   2751     // popcount(A) == 0  ->  A == 0 and likewise for !=
   2752     // popcount(A) == bitwidth(A)  ->  A == -1 and likewise for !=
   2753     bool IsZero = C.isNullValue();
   2754     if (IsZero || C == C.getBitWidth()) {
   2755       Worklist.Add(II);
   2756       Cmp.setOperand(0, II->getArgOperand(0));
   2757       auto *NewOp =
   2758           IsZero ? Constant::getNullValue(Ty) : Constant::getAllOnesValue(Ty);
   2759       Cmp.setOperand(1, NewOp);
   2760       return &Cmp;
   2761     }
   2762     break;
   2763   }
   2764   default:
   2765     break;
   2766   }
   2767 
   2768   return nullptr;
   2769 }
   2770 
   2771 /// Handle icmp with constant (but not simple integer constant) RHS.
   2772 Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) {
   2773   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
   2774   Constant *RHSC = dyn_cast<Constant>(Op1);
   2775   Instruction *LHSI = dyn_cast<Instruction>(Op0);
   2776   if (!RHSC || !LHSI)
   2777     return nullptr;
   2778 
   2779   switch (LHSI->getOpcode()) {
   2780   case Instruction::GetElementPtr:
   2781     // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null
   2782     if (RHSC->isNullValue() &&
   2783         cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices())
   2784       return new ICmpInst(
   2785           I.getPredicate(), LHSI->getOperand(0),
   2786           Constant::getNullValue(LHSI->getOperand(0)->getType()));
   2787     break;
   2788   case Instruction::PHI:
   2789     // Only fold icmp into the PHI if the phi and icmp are in the same
   2790     // block.  If in the same block, we're encouraging jump threading.  If
   2791     // not, we are just pessimizing the code by making an i1 phi.
   2792     if (LHSI->getParent() == I.getParent())
   2793       if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
   2794         return NV;
   2795     break;
   2796   case Instruction::Select: {
   2797     // If either operand of the select is a constant, we can fold the
   2798     // comparison into the select arms, which will cause one to be
   2799     // constant folded and the select turned into a bitwise or.
   2800     Value *Op1 = nullptr, *Op2 = nullptr;
   2801     ConstantInt *CI = nullptr;
   2802     if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
   2803       Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
   2804       CI = dyn_cast<ConstantInt>(Op1);
   2805     }
   2806     if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
   2807       Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
   2808       CI = dyn_cast<ConstantInt>(Op2);
   2809     }
   2810 
   2811     // We only want to perform this transformation if it will not lead to
   2812     // additional code. This is true if either both sides of the select
   2813     // fold to a constant (in which case the icmp is replaced with a select
   2814     // which will usually simplify) or this is the only user of the
   2815     // select (in which case we are trading a select+icmp for a simpler
   2816     // select+icmp) or all uses of the select can be replaced based on
   2817     // dominance information ("Global cases").
   2818     bool Transform = false;
   2819     if (Op1 && Op2)
   2820       Transform = true;
   2821     else if (Op1 || Op2) {
   2822       // Local case
   2823       if (LHSI->hasOneUse())
   2824         Transform = true;
   2825       // Global cases
   2826       else if (CI && !CI->isZero())
   2827         // When Op1 is constant try replacing select with second operand.
   2828         // Otherwise Op2 is constant and try replacing select with first
   2829         // operand.
   2830         Transform =
   2831             replacedSelectWithOperand(cast<SelectInst>(LHSI), &I, Op1 ? 2 : 1);
   2832     }
   2833     if (Transform) {
   2834       if (!Op1)
   2835         Op1 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC,
   2836                                  I.getName());
   2837       if (!Op2)
   2838         Op2 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(2), RHSC,
   2839                                  I.getName());
   2840       return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
   2841     }
   2842     break;
   2843   }
   2844   case Instruction::IntToPtr:
   2845     // icmp pred inttoptr(X), null -> icmp pred X, 0
   2846     if (RHSC->isNullValue() &&
   2847         DL.getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType())
   2848       return new ICmpInst(
   2849           I.getPredicate(), LHSI->getOperand(0),
   2850           Constant::getNullValue(LHSI->getOperand(0)->getType()));
   2851     break;
   2852 
   2853   case Instruction::Load:
   2854     // Try to optimize things like "A[i] > 4" to index computations.
   2855     if (GetElementPtrInst *GEP =
   2856             dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
   2857       if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
   2858         if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
   2859             !cast<LoadInst>(LHSI)->isVolatile())
   2860           if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I))
   2861             return Res;
   2862     }
   2863     break;
   2864   }
   2865 
   2866   return nullptr;
   2867 }
   2868 
   2869 /// Some comparisons can be simplified.
   2870 /// In this case, we are looking for comparisons that look like
   2871 /// a check for a lossy truncation.
   2872 /// Folds:
   2873 ///   x & (-1 >> y) SrcPred x    to    x DstPred (-1 >> y)
   2874 /// The Mask can be a constant, too.
   2875 /// For some predicates, the operands are commutative.
   2876 /// For others, x can only be on a specific side.
   2877 static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
   2878                                           InstCombiner::BuilderTy &Builder) {
   2879   ICmpInst::Predicate SrcPred;
   2880   Value *X, *M;
   2881   auto m_Mask = m_CombineOr(m_LShr(m_AllOnes(), m_Value()), m_LowBitMask());
   2882   if (!match(&I, m_c_ICmp(SrcPred,
   2883                           m_c_And(m_CombineAnd(m_Mask, m_Value(M)), m_Value(X)),
   2884                           m_Deferred(X))))
   2885     return nullptr;
   2886 
   2887   ICmpInst::Predicate DstPred;
   2888   switch (SrcPred) {
   2889   case ICmpInst::Predicate::ICMP_EQ:
   2890     //  x & (-1 >> y) == x    ->    x u<= (-1 >> y)
   2891     DstPred = ICmpInst::Predicate::ICMP_ULE;
   2892     break;
   2893   case ICmpInst::Predicate::ICMP_NE:
   2894     //  x & (-1 >> y) != x    ->    x u> (-1 >> y)
   2895     DstPred = ICmpInst::Predicate::ICMP_UGT;
   2896     break;
   2897   case ICmpInst::Predicate::ICMP_UGT:
   2898     //  x u> x & (-1 >> y)    ->    x u> (-1 >> y)
   2899     assert(X == I.getOperand(0) && "instsimplify took care of commut. variant");
   2900     DstPred = ICmpInst::Predicate::ICMP_UGT;
   2901     break;
   2902   case ICmpInst::Predicate::ICMP_UGE:
   2903     //  x & (-1 >> y) u>= x    ->    x u<= (-1 >> y)
   2904     assert(X == I.getOperand(1) && "instsimplify took care of commut. variant");
   2905     DstPred = ICmpInst::Predicate::ICMP_ULE;
   2906     break;
   2907   case ICmpInst::Predicate::ICMP_ULT:
   2908     //  x & (-1 >> y) u< x    ->    x u> (-1 >> y)
   2909     assert(X == I.getOperand(1) && "instsimplify took care of commut. variant");
   2910     DstPred = ICmpInst::Predicate::ICMP_UGT;
   2911     break;
   2912   case ICmpInst::Predicate::ICMP_ULE:
   2913     //  x u<= x & (-1 >> y)    ->    x u<= (-1 >> y)
   2914     assert(X == I.getOperand(0) && "instsimplify took care of commut. variant");
   2915     DstPred = ICmpInst::Predicate::ICMP_ULE;
   2916     break;
   2917   case ICmpInst::Predicate::ICMP_SGT:
   2918     //  x s> x & (-1 >> y)    ->    x s> (-1 >> y)
   2919     if (X != I.getOperand(0)) // X must be on LHS of comparison!
   2920       return nullptr;         // Ignore the other case.
   2921     DstPred = ICmpInst::Predicate::ICMP_SGT;
   2922     break;
   2923   case ICmpInst::Predicate::ICMP_SGE:
   2924     //  x & (-1 >> y) s>= x    ->    x s<= (-1 >> y)
   2925     if (X != I.getOperand(1)) // X must be on RHS of comparison!
   2926       return nullptr;         // Ignore the other case.
   2927     if (!match(M, m_Constant())) // Can not do this fold with non-constant.
   2928       return nullptr;
   2929     if (!match(M, m_NonNegative())) // Must not have any -1 vector elements.
   2930       return nullptr;
   2931     DstPred = ICmpInst::Predicate::ICMP_SLE;
   2932     break;
   2933   case ICmpInst::Predicate::ICMP_SLT:
   2934     //  x & (-1 >> y) s< x    ->    x s> (-1 >> y)
   2935     if (X != I.getOperand(1)) // X must be on RHS of comparison!
   2936       return nullptr;         // Ignore the other case.
   2937     if (!match(M, m_Constant())) // Can not do this fold with non-constant.
   2938       return nullptr;
   2939     if (!match(M, m_NonNegative())) // Must not have any -1 vector elements.
   2940       return nullptr;
   2941     DstPred = ICmpInst::Predicate::ICMP_SGT;
   2942     break;
   2943   case ICmpInst::Predicate::ICMP_SLE:
   2944     //  x s<= x & (-1 >> y)    ->    x s<= (-1 >> y)
   2945     if (X != I.getOperand(0)) // X must be on LHS of comparison!
   2946       return nullptr;         // Ignore the other case.
   2947     DstPred = ICmpInst::Predicate::ICMP_SLE;
   2948     break;
   2949   default:
   2950     llvm_unreachable("All possible folds are handled.");
   2951   }
   2952 
   2953   return Builder.CreateICmp(DstPred, X, M);
   2954 }
   2955 
   2956 /// Some comparisons can be simplified.
   2957 /// In this case, we are looking for comparisons that look like
   2958 /// a check for a lossy signed truncation.
   2959 /// Folds:   (MaskedBits is a constant.)
   2960 ///   ((%x << MaskedBits) a>> MaskedBits) SrcPred %x
   2961 /// Into:
   2962 ///   (add %x, (1 << (KeptBits-1))) DstPred (1 << KeptBits)
   2963 /// Where  KeptBits = bitwidth(%x) - MaskedBits
   2964 static Value *
   2965 foldICmpWithTruncSignExtendedVal(ICmpInst &I,
   2966                                  InstCombiner::BuilderTy &Builder) {
   2967   ICmpInst::Predicate SrcPred;
   2968   Value *X;
   2969   const APInt *C0, *C1; // FIXME: non-splats, potentially with undef.
   2970   // We are ok with 'shl' having multiple uses, but 'ashr' must be one-use.
   2971   if (!match(&I, m_c_ICmp(SrcPred,
   2972                           m_OneUse(m_AShr(m_Shl(m_Value(X), m_APInt(C0)),
   2973                                           m_APInt(C1))),
   2974                           m_Deferred(X))))
   2975     return nullptr;
   2976 
   2977   // Potential handling of non-splats: for each element:
   2978   //  * if both are undef, replace with constant 0.
   2979   //    Because (1<<0) is OK and is 1, and ((1<<0)>>1) is also OK and is 0.
   2980   //  * if both are not undef, and are different, bailout.
   2981   //  * else, only one is undef, then pick the non-undef one.
   2982 
   2983   // The shift amount must be equal.
   2984   if (*C0 != *C1)
   2985     return nullptr;
   2986   const APInt &MaskedBits = *C0;
   2987   assert(MaskedBits != 0 && "shift by zero should be folded away already.");
   2988 
   2989   ICmpInst::Predicate DstPred;
   2990   switch (SrcPred) {
   2991   case ICmpInst::Predicate::ICMP_EQ:
   2992     // ((%x << MaskedBits) a>> MaskedBits) == %x
   2993     //   =>
   2994     // (add %x, (1 << (KeptBits-1))) u< (1 << KeptBits)
   2995     DstPred = ICmpInst::Predicate::ICMP_ULT;
   2996     break;
   2997   case ICmpInst::Predicate::ICMP_NE:
   2998     // ((%x << MaskedBits) a>> MaskedBits) != %x
   2999     //   =>
   3000     // (add %x, (1 << (KeptBits-1))) u>= (1 << KeptBits)
   3001     DstPred = ICmpInst::Predicate::ICMP_UGE;
   3002     break;
   3003   // FIXME: are more folds possible?
   3004   default:
   3005     return nullptr;
   3006   }
   3007 
   3008   auto *XType = X->getType();
   3009   const unsigned XBitWidth = XType->getScalarSizeInBits();
   3010   const APInt BitWidth = APInt(XBitWidth, XBitWidth);
   3011   assert(BitWidth.ugt(MaskedBits) && "shifts should leave some bits untouched");
   3012 
   3013   // KeptBits = bitwidth(%x) - MaskedBits
   3014   const APInt KeptBits = BitWidth - MaskedBits;
   3015   assert(KeptBits.ugt(0) && KeptBits.ult(BitWidth) && "unreachable");
   3016   // ICmpCst = (1 << KeptBits)
   3017   const APInt ICmpCst = APInt(XBitWidth, 1).shl(KeptBits);
   3018   assert(ICmpCst.isPowerOf2());
   3019   // AddCst = (1 << (KeptBits-1))
   3020   const APInt AddCst = ICmpCst.lshr(1);
   3021   assert(AddCst.ult(ICmpCst) && AddCst.isPowerOf2());
   3022 
   3023   // T0 = add %x, AddCst
   3024   Value *T0 = Builder.CreateAdd(X, ConstantInt::get(XType, AddCst));
   3025   // T1 = T0 DstPred ICmpCst
   3026   Value *T1 = Builder.CreateICmp(DstPred, T0, ConstantInt::get(XType, ICmpCst));
   3027 
   3028   return T1;
   3029 }
   3030 
   3031 /// Try to fold icmp (binop), X or icmp X, (binop).
   3032 /// TODO: A large part of this logic is duplicated in InstSimplify's
   3033 /// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
   3034 /// duplication.
   3035 Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
   3036   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
   3037 
   3038   // Special logic for binary operators.
   3039   BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0);
   3040   BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1);
   3041   if (!BO0 && !BO1)
   3042     return nullptr;
   3043 
   3044   const CmpInst::Predicate Pred = I.getPredicate();
   3045   bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
   3046   if (BO0 && isa<OverflowingBinaryOperator>(BO0))
   3047     NoOp0WrapProblem =
   3048         ICmpInst::isEquality(Pred) ||
   3049         (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
   3050         (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
   3051   if (BO1 && isa<OverflowingBinaryOperator>(BO1))
   3052     NoOp1WrapProblem =
   3053         ICmpInst::isEquality(Pred) ||
   3054         (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
   3055         (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
   3056 
   3057   // Analyze the case when either Op0 or Op1 is an add instruction.
   3058   // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
   3059   Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
   3060   if (BO0 && BO0->getOpcode() == Instruction::Add) {
   3061     A = BO0->getOperand(0);
   3062     B = BO0->getOperand(1);
   3063   }
   3064   if (BO1 && BO1->getOpcode() == Instruction::Add) {
   3065     C = BO1->getOperand(0);
   3066     D = BO1->getOperand(1);
   3067   }
   3068 
   3069   // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
   3070   if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
   3071     return new ICmpInst(Pred, A == Op1 ? B : A,
   3072                         Constant::getNullValue(Op1->getType()));
   3073 
   3074   // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
   3075   if ((C == Op0 || D == Op0) && NoOp1WrapProblem)
   3076     return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()),
   3077                         C == Op0 ? D : C);
   3078 
   3079   // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow.
   3080   if (A && C && (A == C || A == D || B == C || B == D) && NoOp0WrapProblem &&
   3081       NoOp1WrapProblem &&
   3082       // Try not to increase register pressure.
   3083       BO0->hasOneUse() && BO1->hasOneUse()) {
   3084     // Determine Y and Z in the form icmp (X+Y), (X+Z).
   3085     Value *Y, *Z;
   3086     if (A == C) {
   3087       // C + B == C + D  ->  B == D
   3088       Y = B;
   3089       Z = D;
   3090     } else if (A == D) {
   3091       // D + B == C + D  ->  B == C
   3092       Y = B;
   3093       Z = C;
   3094     } else if (B == C) {
   3095       // A + C == C + D  ->  A == D
   3096       Y = A;
   3097       Z = D;
   3098     } else {
   3099       assert(B == D);
   3100       // A + D == C + D  ->  A == C
   3101       Y = A;
   3102       Z = C;
   3103     }
   3104     return new ICmpInst(Pred, Y, Z);
   3105   }
   3106 
   3107   // icmp slt (X + -1), Y -> icmp sle X, Y
   3108   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
   3109       match(B, m_AllOnes()))
   3110     return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
   3111 
   3112   // icmp sge (X + -1), Y -> icmp sgt X, Y
   3113   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
   3114       match(B, m_AllOnes()))
   3115     return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
   3116 
   3117   // icmp sle (X + 1), Y -> icmp slt X, Y
   3118   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && match(B, m_One()))
   3119     return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
   3120 
   3121   // icmp sgt (X + 1), Y -> icmp sge X, Y
   3122   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && match(B, m_One()))
   3123     return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
   3124 
   3125   // icmp sgt X, (Y + -1) -> icmp sge X, Y
   3126   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT &&
   3127       match(D, m_AllOnes()))
   3128     return new ICmpInst(CmpInst::ICMP_SGE, Op0, C);
   3129 
   3130   // icmp sle X, (Y + -1) -> icmp slt X, Y
   3131   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE &&
   3132       match(D, m_AllOnes()))
   3133     return new ICmpInst(CmpInst::ICMP_SLT, Op0, C);
   3134 
   3135   // icmp sge X, (Y + 1) -> icmp sgt X, Y
   3136   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE && match(D, m_One()))
   3137     return new ICmpInst(CmpInst::ICMP_SGT, Op0, C);
   3138 
   3139   // icmp slt X, (Y + 1) -> icmp sle X, Y
   3140   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT && match(D, m_One()))
   3141     return new ICmpInst(CmpInst::ICMP_SLE, Op0, C);
   3142 
   3143   // TODO: The subtraction-related identities shown below also hold, but
   3144   // canonicalization from (X -nuw 1) to (X + -1) means that the combinations
   3145   // wouldn't happen even if they were implemented.
   3146   //
   3147   // icmp ult (X - 1), Y -> icmp ule X, Y
   3148   // icmp uge (X - 1), Y -> icmp ugt X, Y
   3149   // icmp ugt X, (Y - 1) -> icmp uge X, Y
   3150   // icmp ule X, (Y - 1) -> icmp ult X, Y
   3151 
   3152   // icmp ule (X + 1), Y -> icmp ult X, Y
   3153   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_ULE && match(B, m_One()))
   3154     return new ICmpInst(CmpInst::ICMP_ULT, A, Op1);
   3155 
   3156   // icmp ugt (X + 1), Y -> icmp uge X, Y
   3157   if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_UGT && match(B, m_One()))
   3158     return new ICmpInst(CmpInst::ICMP_UGE, A, Op1);
   3159 
   3160   // icmp uge X, (Y + 1) -> icmp ugt X, Y
   3161   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_UGE && match(D, m_One()))
   3162     return new ICmpInst(CmpInst::ICMP_UGT, Op0, C);
   3163 
   3164   // icmp ult X, (Y + 1) -> icmp ule X, Y
   3165   if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_ULT && match(D, m_One()))
   3166     return new ICmpInst(CmpInst::ICMP_ULE, Op0, C);
   3167 
   3168   // if C1 has greater magnitude than C2:
   3169   //  icmp (X + C1), (Y + C2) -> icmp (X + C3), Y
   3170   //  s.t. C3 = C1 - C2
   3171   //
   3172   // if C2 has greater magnitude than C1:
   3173   //  icmp (X + C1), (Y + C2) -> icmp X, (Y + C3)
   3174   //  s.t. C3 = C2 - C1
   3175   if (A && C && NoOp0WrapProblem && NoOp1WrapProblem &&
   3176       (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned())
   3177     if (ConstantInt *C1 = dyn_cast<ConstantInt>(B))
   3178       if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) {
   3179         const APInt &AP1 = C1->getValue();
   3180         const APInt &AP2 = C2->getValue();
   3181         if (AP1.isNegative() == AP2.isNegative()) {
   3182           APInt AP1Abs = C1->getValue().abs();
   3183           APInt AP2Abs = C2->getValue().abs();
   3184           if (AP1Abs.uge(AP2Abs)) {
   3185             ConstantInt *C3 = Builder.getInt(AP1 - AP2);
   3186             Value *NewAdd = Builder.CreateNSWAdd(A, C3);
   3187             return new ICmpInst(Pred, NewAdd, C);
   3188           } else {
   3189             ConstantInt *C3 = Builder.getInt(AP2 - AP1);
   3190             Value *NewAdd = Builder.CreateNSWAdd(C, C3);
   3191             return new ICmpInst(Pred, A, NewAdd);
   3192           }
   3193         }
   3194       }
   3195 
   3196   // Analyze the case when either Op0 or Op1 is a sub instruction.
   3197   // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
   3198   A = nullptr;
   3199   B = nullptr;
   3200   C = nullptr;
   3201   D = nullptr;
   3202   if (BO0 && BO0->getOpcode() == Instruction::Sub) {
   3203     A = BO0->getOperand(0);
   3204     B = BO0->getOperand(1);
   3205   }
   3206   if (BO1 && BO1->getOpcode() == Instruction::Sub) {
   3207     C = BO1->getOperand(0);
   3208     D = BO1->getOperand(1);
   3209   }
   3210 
   3211   // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow.
   3212   if (A == Op1 && NoOp0WrapProblem)
   3213     return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B);
   3214   // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow.
   3215   if (C == Op0 && NoOp1WrapProblem)
   3216     return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType()));
   3217 
   3218   // (A - B) >u A --> A <u B
   3219   if (A == Op1 && Pred == ICmpInst::ICMP_UGT)
   3220     return new ICmpInst(ICmpInst::ICMP_ULT, A, B);
   3221   // C <u (C - D) --> C <u D
   3222   if (C == Op0 && Pred == ICmpInst::ICMP_ULT)
   3223     return new ICmpInst(ICmpInst::ICMP_ULT, C, D);
   3224 
   3225   // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow.
   3226   if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem &&
   3227       // Try not to increase register pressure.
   3228       BO0->hasOneUse() && BO1->hasOneUse())
   3229     return new ICmpInst(Pred, A, C);
   3230   // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow.
   3231   if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem &&
   3232       // Try not to increase register pressure.
   3233       BO0->hasOneUse() && BO1->hasOneUse())
   3234     return new ICmpInst(Pred, D, B);
   3235 
   3236   // icmp (0-X) < cst --> x > -cst
   3237   if (NoOp0WrapProblem && ICmpInst::isSigned(Pred)) {
   3238     Value *X;
   3239     if (match(BO0, m_Neg(m_Value(X))))
   3240       if (Constant *RHSC = dyn_cast<Constant>(Op1))
   3241         if (RHSC->isNotMinSignedValue())
   3242           return new ICmpInst(I.getSwappedPredicate(), X,
   3243                               ConstantExpr::getNeg(RHSC));
   3244   }
   3245 
   3246   BinaryOperator *SRem = nullptr;
   3247   // icmp (srem X, Y), Y
   3248   if (BO0 && BO0->getOpcode() == Instruction::SRem && Op1 == BO0->getOperand(1))
   3249     SRem = BO0;
   3250   // icmp Y, (srem X, Y)
   3251   else if (BO1 && BO1->getOpcode() == Instruction::SRem &&
   3252            Op0 == BO1->getOperand(1))
   3253     SRem = BO1;
   3254   if (SRem) {
   3255     // We don't check hasOneUse to avoid increasing register pressure because
   3256     // the value we use is the same value this instruction was already using.
   3257     switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) {
   3258     default:
   3259       break;
   3260     case ICmpInst::ICMP_EQ:
   3261       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
   3262     case ICmpInst::ICMP_NE:
   3263       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
   3264     case ICmpInst::ICMP_SGT:
   3265     case ICmpInst::ICMP_SGE:
   3266       return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1),
   3267                           Constant::getAllOnesValue(SRem->getType()));
   3268     case ICmpInst::ICMP_SLT:
   3269     case ICmpInst::ICMP_SLE:
   3270       return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1),
   3271                           Constant::getNullValue(SRem->getType()));
   3272     }
   3273   }
   3274 
   3275   if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && BO0->hasOneUse() &&
   3276       BO1->hasOneUse() && BO0->getOperand(1) == BO1->getOperand(1)) {
   3277     switch (BO0->getOpcode()) {
   3278     default:
   3279       break;
   3280     case Instruction::Add:
   3281     case Instruction::Sub:
   3282     case Instruction::Xor: {
   3283       if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
   3284         return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
   3285 
   3286       const APInt *C;
   3287       if (match(BO0->getOperand(1), m_APInt(C))) {
   3288         // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
   3289         if (C->isSignMask()) {
   3290           ICmpInst::Predicate NewPred =
   3291               I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate();
   3292           return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0));
   3293         }
   3294 
   3295         // icmp u/s (a ^ maxsignval), (b ^ maxsignval) --> icmp s/u' a, b
   3296         if (BO0->getOpcode() == Instruction::Xor && C->isMaxSignedValue()) {
   3297           ICmpInst::Predicate NewPred =
   3298               I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate();
   3299           NewPred = I.getSwappedPredicate(NewPred);
   3300           return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0));
   3301         }
   3302       }
   3303       break;
   3304     }
   3305     case Instruction::Mul: {
   3306       if (!I.isEquality())
   3307         break;
   3308 
   3309       const APInt *C;
   3310       if (match(BO0->getOperand(1), m_APInt(C)) && !C->isNullValue() &&
   3311           !C->isOneValue()) {
   3312         // icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask)
   3313         // Mask = -1 >> count-trailing-zeros(C).
   3314         if (unsigned TZs = C->countTrailingZeros()) {
   3315           Constant *Mask = ConstantInt::get(
   3316               BO0->getType(),
   3317               APInt::getLowBitsSet(C->getBitWidth(), C->getBitWidth() - TZs));
   3318           Value *And1 = Builder.CreateAnd(BO0->getOperand(0), Mask);
   3319           Value *And2 = Builder.CreateAnd(BO1->getOperand(0), Mask);
   3320           return new ICmpInst(Pred, And1, And2);
   3321         }
   3322         // If there are no trailing zeros in the multiplier, just eliminate
   3323         // the multiplies (no masking is needed):
   3324         // icmp eq/ne (X * C), (Y * C) --> icmp eq/ne X, Y
   3325         return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
   3326       }
   3327       break;
   3328     }
   3329     case Instruction::UDiv:
   3330     case Instruction::LShr:
   3331       if (I.isSigned() || !BO0->isExact() || !BO1->isExact())
   3332         break;
   3333       return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
   3334 
   3335     case Instruction::SDiv:
   3336       if (!I.isEquality() || !BO0->isExact() || !BO1->isExact())
   3337         break;
   3338       return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
   3339 
   3340     case Instruction::AShr:
   3341       if (!BO0->isExact() || !BO1->isExact())
   3342         break;
   3343       return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
   3344 
   3345     case Instruction::Shl: {
   3346       bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
   3347       bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
   3348       if (!NUW && !NSW)
   3349         break;
   3350       if (!NSW && I.isSigned())
   3351         break;
   3352       return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
   3353     }
   3354     }
   3355   }
   3356 
   3357   if (BO0) {
   3358     // Transform  A & (L - 1) `ult` L --> L != 0
   3359     auto LSubOne = m_Add(m_Specific(Op1), m_AllOnes());
   3360     auto BitwiseAnd = m_c_And(m_Value(), LSubOne);
   3361 
   3362     if (match(BO0, BitwiseAnd) && Pred == ICmpInst::ICMP_ULT) {
   3363       auto *Zero = Constant::getNullValue(BO0->getType());
   3364       return new ICmpInst(ICmpInst::ICMP_NE, Op1, Zero);
   3365     }
   3366   }
   3367 
   3368   if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder))
   3369     return replaceInstUsesWith(I, V);
   3370 
   3371   if (Value *V = foldICmpWithTruncSignExtendedVal(I, Builder))
   3372     return replaceInstUsesWith(I, V);
   3373 
   3374   return nullptr;
   3375 }
   3376 
   3377 /// Fold icmp Pred min|max(X, Y), X.
   3378 static Instruction *foldICmpWithMinMax(ICmpInst &Cmp) {
   3379   ICmpInst::Predicate Pred = Cmp.getPredicate();
   3380   Value *Op0 = Cmp.getOperand(0);
   3381   Value *X = Cmp.getOperand(1);
   3382 
   3383   // Canonicalize minimum or maximum operand to LHS of the icmp.
   3384   if (match(X, m_c_SMin(m_Specific(Op0), m_Value())) ||
   3385       match(X, m_c_SMax(m_Specific(Op0), m_Value())) ||
   3386       match(X, m_c_UMin(m_Specific(Op0), m_Value())) ||
   3387       match(X, m_c_UMax(m_Specific(Op0), m_Value()))) {
   3388     std::swap(Op0, X);
   3389     Pred = Cmp.getSwappedPredicate();
   3390   }
   3391 
   3392   Value *Y;
   3393   if (match(Op0, m_c_SMin(m_Specific(X), m_Value(Y)))) {
   3394     // smin(X, Y)  == X --> X s<= Y
   3395     // smin(X, Y) s>= X --> X s<= Y
   3396     if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SGE)
   3397       return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
   3398 
   3399     // smin(X, Y) != X --> X s> Y
   3400     // smin(X, Y) s< X --> X s> Y
   3401     if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SLT)
   3402       return new ICmpInst(ICmpInst::ICMP_SGT, X, Y);
   3403 
   3404     // These cases should be handled in InstSimplify:
   3405     // smin(X, Y) s<= X --> true
   3406     // smin(X, Y) s> X --> false
   3407     return nullptr;
   3408   }
   3409 
   3410   if (match(Op0, m_c_SMax(m_Specific(X), m_Value(Y)))) {
   3411     // smax(X, Y)  == X --> X s>= Y
   3412     // smax(X, Y) s<= X --> X s>= Y
   3413     if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLE)
   3414       return new ICmpInst(ICmpInst::ICMP_SGE, X, Y);
   3415 
   3416     // smax(X, Y) != X --> X s< Y
   3417     // smax(X, Y) s> X --> X s< Y
   3418     if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SGT)
   3419       return new ICmpInst(ICmpInst::ICMP_SLT, X, Y);
   3420 
   3421     // These cases should be handled in InstSimplify:
   3422     // smax(X, Y) s>= X --> true
   3423     // smax(X, Y) s< X --> false
   3424     return nullptr;
   3425   }
   3426 
   3427   if (match(Op0, m_c_UMin(m_Specific(X), m_Value(Y)))) {
   3428     // umin(X, Y)  == X --> X u<= Y
   3429     // umin(X, Y) u>= X --> X u<= Y
   3430     if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_UGE)
   3431       return new ICmpInst(ICmpInst::ICMP_ULE, X, Y);
   3432 
   3433     // umin(X, Y) != X --> X u> Y
   3434     // umin(X, Y) u< X --> X u> Y
   3435     if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_ULT)
   3436       return new ICmpInst(ICmpInst::ICMP_UGT, X, Y);
   3437 
   3438     // These cases should be handled in InstSimplify:
   3439     // umin(X, Y) u<= X --> true
   3440     // umin(X, Y) u> X --> false
   3441     return nullptr;
   3442   }
   3443 
   3444   if (match(Op0, m_c_UMax(m_Specific(X), m_Value(Y)))) {
   3445     // umax(X, Y)  == X --> X u>= Y
   3446     // umax(X, Y) u<= X --> X u>= Y
   3447     if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_ULE)
   3448       return new ICmpInst(ICmpInst::ICMP_UGE, X, Y);
   3449 
   3450     // umax(X, Y) != X --> X u< Y
   3451     // umax(X, Y) u> X --> X u< Y
   3452     if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_UGT)
   3453       return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
   3454 
   3455     // These cases should be handled in InstSimplify:
   3456     // umax(X, Y) u>= X --> true
   3457     // umax(X, Y) u< X --> false
   3458     return nullptr;
   3459   }
   3460 
   3461   return nullptr;
   3462 }
   3463 
   3464 Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
   3465   if (!I.isEquality())
   3466     return nullptr;
   3467 
   3468   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
   3469   const CmpInst::Predicate Pred = I.getPredicate();
   3470   Value *A, *B, *C, *D;
   3471   if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
   3472     if (A == Op1 || B == Op1) { // (A^B) == A  ->  B == 0
   3473       Value *OtherVal = A == Op1 ? B : A;
   3474       return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType()));
   3475     }
   3476 
   3477     if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
   3478       // A^c1 == C^c2 --> A == C^(c1^c2)
   3479       ConstantInt *C1, *C2;
   3480       if (match(B, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2)) &&
   3481           Op1->hasOneUse()) {
   3482         Constant *NC = Builder.getInt(C1->getValue() ^ C2->getValue());
   3483         Value *Xor = Builder.CreateXor(C, NC);
   3484         return new ICmpInst(Pred, A, Xor);
   3485       }
   3486 
   3487       // A^B == A^D -> B == D
   3488       if (A == C)
   3489         return new ICmpInst(Pred, B, D);
   3490       if (A == D)
   3491         return new ICmpInst(Pred, B, C);
   3492       if (B == C)
   3493         return new ICmpInst(Pred, A, D);
   3494       if (B == D)
   3495         return new ICmpInst(Pred, A, C);
   3496     }
   3497   }
   3498 
   3499   if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) {
   3500     // A == (A^B)  ->  B == 0
   3501     Value *OtherVal = A == Op0 ? B : A;
   3502     return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType()));
   3503   }
   3504 
   3505   // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
   3506   if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
   3507       match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
   3508     Value *X = nullptr, *Y = nullptr, *Z = nullptr;
   3509 
   3510     if (A == C) {
   3511       X = B;
   3512       Y = D;
   3513       Z = A;
   3514     } else if (A == D) {
   3515       X = B;
   3516       Y = C;
   3517       Z = A;
   3518     } else if (B == C) {
   3519       X = A;
   3520       Y = D;
   3521       Z = B;
   3522     } else if (B == D) {
   3523       X = A;
   3524       Y = C;
   3525       Z = B;
   3526     }
   3527 
   3528     if (X) { // Build (X^Y) & Z
   3529       Op1 = Builder.CreateXor(X, Y);
   3530       Op1 = Builder.CreateAnd(Op1, Z);
   3531       I.setOperand(0, Op1);
   3532       I.setOperand(1, Constant::getNullValue(Op1->getType()));
   3533       return &I;
   3534     }
   3535   }
   3536 
   3537   // Transform (zext A) == (B & (1<<X)-1) --> A == (trunc B)
   3538   // and       (B & (1<<X)-1) == (zext A) --> A == (trunc B)
   3539   ConstantInt *Cst1;
   3540   if ((Op0->hasOneUse() && match(Op0, m_ZExt(m_Value(A))) &&
   3541        match(Op1, m_And(m_Value(B), m_ConstantInt(Cst1)))) ||
   3542       (Op1->hasOneUse() && match(Op0, m_And(m_Value(B), m_ConstantInt(Cst1))) &&
   3543        match(Op1, m_ZExt(m_Value(A))))) {
   3544     APInt Pow2 = Cst1->getValue() + 1;
   3545     if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) &&
   3546         Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth())
   3547       return new ICmpInst(Pred, A, Builder.CreateTrunc(B, A->getType()));
   3548   }
   3549 
   3550   // (A >> C) == (B >> C) --> (A^B) u< (1 << C)
   3551   // For lshr and ashr pairs.
   3552   if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) &&
   3553        match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) ||
   3554       (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) &&
   3555        match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) {
   3556     unsigned TypeBits = Cst1->getBitWidth();
   3557     unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
   3558     if (ShAmt < TypeBits && ShAmt != 0) {
   3559       ICmpInst::Predicate NewPred =
   3560           Pred == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
   3561       Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted");
   3562       APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
   3563       return new ICmpInst(NewPred, Xor, Builder.getInt(CmpVal));
   3564     }
   3565   }
   3566 
   3567   // (A << C) == (B << C) --> ((A^B) & (~0U >> C)) == 0
   3568   if (match(Op0, m_OneUse(m_Shl(m_Value(A), m_ConstantInt(Cst1)))) &&
   3569       match(Op1, m_OneUse(m_Shl(m_Value(B), m_Specific(Cst1))))) {
   3570     unsigned TypeBits = Cst1->getBitWidth();
   3571     unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
   3572     if (ShAmt < TypeBits && ShAmt != 0) {
   3573       Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted");
   3574       APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt);
   3575       Value *And = Builder.CreateAnd(Xor, Builder.getInt(AndVal),
   3576                                       I.getName() + ".mask");
   3577       return new ICmpInst(Pred, And, Constant::getNullValue(Cst1->getType()));
   3578     }
   3579   }
   3580 
   3581   // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
   3582   // "icmp (and X, mask), cst"
   3583   uint64_t ShAmt = 0;
   3584   if (Op0->hasOneUse() &&
   3585       match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), m_ConstantInt(ShAmt))))) &&
   3586       match(Op1, m_ConstantInt(Cst1)) &&
   3587       // Only do this when A has multiple uses.  This is most important to do
   3588       // when it exposes other optimizations.
   3589       !A->hasOneUse()) {
   3590     unsigned ASize = cast<IntegerType>(A->getType())->getPrimitiveSizeInBits();
   3591 
   3592     if (ShAmt < ASize) {
   3593       APInt MaskV =
   3594           APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits());
   3595       MaskV <<= ShAmt;
   3596 
   3597       APInt CmpV = Cst1->getValue().zext(ASize);
   3598       CmpV <<= ShAmt;
   3599 
   3600       Value *Mask = Builder.CreateAnd(A, Builder.getInt(MaskV));
   3601       return new ICmpInst(Pred, Mask, Builder.getInt(CmpV));
   3602     }
   3603   }
   3604 
   3605   // If both operands are byte-swapped or bit-reversed, just compare the
   3606   // original values.
   3607   // TODO: Move this to a function similar to foldICmpIntrinsicWithConstant()
   3608   // and handle more intrinsics.
   3609   if ((match(Op0, m_BSwap(m_Value(A))) && match(Op1, m_BSwap(m_Value(B)))) ||
   3610       (match(Op0, m_BitReverse(m_Value(A))) &&
   3611        match(Op1, m_BitReverse(m_Value(B)))))
   3612     return new ICmpInst(Pred, A, B);
   3613 
   3614   return nullptr;
   3615 }
   3616 
   3617 /// Handle icmp (cast x to y), (cast/cst). We only handle extending casts so
   3618 /// far.
   3619 Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
   3620   const CastInst *LHSCI = cast<CastInst>(ICmp.getOperand(0));
   3621   Value *LHSCIOp        = LHSCI->getOperand(0);
   3622   Type *SrcTy     = LHSCIOp->getType();
   3623   Type *DestTy    = LHSCI->getType();
   3624   Value *RHSCIOp;
   3625 
   3626   // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
   3627   // integer type is the same size as the pointer type.
   3628   const auto& CompatibleSizes = [&](Type* SrcTy, Type* DestTy) -> bool {
   3629     if (isa<VectorType>(SrcTy)) {
   3630       SrcTy = cast<VectorType>(SrcTy)->getElementType();
   3631       DestTy = cast<VectorType>(DestTy)->getElementType();
   3632     }
   3633     return DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth();
   3634   };
   3635   if (LHSCI->getOpcode() == Instruction::PtrToInt &&
   3636       CompatibleSizes(SrcTy, DestTy)) {
   3637     Value *RHSOp = nullptr;
   3638     if (auto *RHSC = dyn_cast<PtrToIntOperator>(ICmp.getOperand(1))) {
   3639       Value *RHSCIOp = RHSC->getOperand(0);
   3640       if (RHSCIOp->getType()->getPointerAddressSpace() ==
   3641           LHSCIOp->getType()->getPointerAddressSpace()) {
   3642         RHSOp = RHSC->getOperand(0);
   3643         // If the pointer types don't match, insert a bitcast.
   3644         if (LHSCIOp->getType() != RHSOp->getType())
   3645           RHSOp = Builder.CreateBitCast(RHSOp, LHSCIOp->getType());
   3646       }
   3647     } else if (auto *RHSC = dyn_cast<Constant>(ICmp.getOperand(1))) {
   3648       RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
   3649     }
   3650 
   3651     if (RHSOp)
   3652       return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSOp);
   3653   }
   3654 
   3655   // The code below only handles extension cast instructions, so far.
   3656   // Enforce this.
   3657   if (LHSCI->getOpcode() != Instruction::ZExt &&
   3658       LHSCI->getOpcode() != Instruction::SExt)
   3659     return nullptr;
   3660 
   3661   bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt;
   3662   bool isSignedCmp = ICmp.isSigned();
   3663 
   3664   if (auto *CI = dyn_cast<CastInst>(ICmp.getOperand(1))) {
   3665     // Not an extension from the same type?
   3666     RHSCIOp = CI->getOperand(0);
   3667     if (RHSCIOp->getType() != LHSCIOp->getType())
   3668       return nullptr;
   3669 
   3670     // If the signedness of the two casts doesn't agree (i.e. one is a sext
   3671     // and the other is a zext), then we can't handle this.
   3672     if (CI->getOpcode() != LHSCI->getOpcode())
   3673       return nullptr;
   3674 
   3675     // Deal with equality cases early.
   3676     if (ICmp.isEquality())
   3677       return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSCIOp);
   3678 
   3679     // A signed comparison of sign extended values simplifies into a
   3680     // signed comparison.
   3681     if (isSignedCmp && isSignedExt)
   3682       return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSCIOp);
   3683 
   3684     // The other three cases all fold into an unsigned comparison.
   3685     return new ICmpInst(ICmp.getUnsignedPredicate(), LHSCIOp, RHSCIOp);
   3686   }
   3687 
   3688   // If we aren't dealing with a constant on the RHS, exit early.
   3689   auto *C = dyn_cast<Constant>(ICmp.getOperand(1));
   3690   if (!C)
   3691     return nullptr;
   3692 
   3693   // Compute the constant that would happen if we truncated to SrcTy then
   3694   // re-extended to DestTy.
   3695   Constant *Res1 = ConstantExpr::getTrunc(C, SrcTy);
   3696   Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), Res1, DestTy);
   3697 
   3698   // If the re-extended constant didn't change...
   3699   if (Res2 == C) {
   3700     // Deal with equality cases early.
   3701     if (ICmp.isEquality())
   3702       return new ICmpInst(ICmp.getPredicate(), LHSCIOp, Res1);
   3703 
   3704     // A signed comparison of sign extended values simplifies into a
   3705     // signed comparison.
   3706     if (isSignedExt && isSignedCmp)
   3707       return new ICmpInst(ICmp.getPredicate(), LHSCIOp, Res1);
   3708 
   3709     // The other three cases all fold into an unsigned comparison.
   3710     return new ICmpInst(ICmp.getUnsignedPredicate(), LHSCIOp, Res1);
   3711   }
   3712 
   3713   // The re-extended constant changed, partly changed (in the case of a vector),
   3714   // or could not be determined to be equal (in the case of a constant
   3715   // expression), so the constant cannot be represented in the shorter type.
   3716   // Consequently, we cannot emit a simple comparison.
   3717   // All the cases that fold to true or false will have already been handled
   3718   // by SimplifyICmpInst, so only deal with the tricky case.
   3719 
   3720   if (isSignedCmp || !isSignedExt || !isa<ConstantInt>(C))
   3721     return nullptr;
   3722 
   3723   // Evaluate the comparison for LT (we invert for GT below). LE and GE cases
   3724   // should have been folded away previously and not enter in here.
   3725 
   3726   // We're performing an unsigned comp with a sign extended value.
   3727   // This is true if the input is >= 0. [aka >s -1]
   3728   Constant *NegOne = Constant::getAllOnesValue(SrcTy);
   3729   Value *Result = Builder.CreateICmpSGT(LHSCIOp, NegOne, ICmp.getName());
   3730 
   3731   // Finally, return the value computed.
   3732   if (ICmp.getPredicate() == ICmpInst::ICMP_ULT)
   3733     return replaceInstUsesWith(ICmp, Result);
   3734 
   3735   assert(ICmp.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!");
   3736   return BinaryOperator::CreateNot(Result);
   3737 }
   3738 
   3739 bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
   3740                                          Value *RHS, Instruction &OrigI,
   3741                                          Value *&Result, Constant *&Overflow) {
   3742   if (OrigI.isCommutative() && isa<Constant>(LHS) && !isa<Constant>(RHS))
   3743     std::swap(LHS, RHS);
   3744 
   3745   auto SetResult = [&](Value *OpResult, Constant *OverflowVal, bool ReuseName) {
   3746     Result = OpResult;
   3747     Overflow = OverflowVal;
   3748     if (ReuseName)
   3749       Result->takeName(&OrigI);
   3750     return true;
   3751   };
   3752 
   3753   // If the overflow check was an add followed by a compare, the insertion point
   3754   // may be pointing to the compare.  We want to insert the new instructions
   3755   // before the add in case there are uses of the add between the add and the
   3756   // compare.
   3757   Builder.SetInsertPoint(&OrigI);
   3758 
   3759   switch (OCF) {
   3760   case OCF_INVALID:
   3761     llvm_unreachable("bad overflow check kind!");
   3762 
   3763   case OCF_UNSIGNED_ADD: {
   3764     OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI);
   3765     if (OR == OverflowResult::NeverOverflows)
   3766       return SetResult(Builder.CreateNUWAdd(LHS, RHS), Builder.getFalse(),
   3767                        true);
   3768 
   3769     if (OR == OverflowResult::AlwaysOverflows)
   3770       return SetResult(Builder.CreateAdd(LHS, RHS), Builder.getTrue(), true);
   3771 
   3772     // Fall through uadd into sadd
   3773     LLVM_FALLTHROUGH;
   3774   }
   3775   case OCF_SIGNED_ADD: {
   3776     // X + 0 -> {X, false}
   3777     if (match(RHS, m_Zero()))
   3778       return SetResult(LHS, Builder.getFalse(), false);
   3779 
   3780     // We can strength reduce this signed add into a regular add if we can prove
   3781     // that it will never overflow.
   3782     if (OCF == OCF_SIGNED_ADD)
   3783       if (willNotOverflowSignedAdd(LHS, RHS, OrigI))
   3784         return SetResult(Builder.CreateNSWAdd(LHS, RHS), Builder.getFalse(),
   3785                          true);
   3786     break;
   3787   }
   3788 
   3789   case OCF_UNSIGNED_SUB:
   3790   case OCF_SIGNED_SUB: {
   3791     // X - 0 -> {X, false}
   3792     if (match(RHS, m_Zero()))
   3793       return SetResult(LHS, Builder.getFalse(), false);
   3794 
   3795     if (OCF == OCF_SIGNED_SUB) {
   3796       if (willNotOverflowSignedSub(LHS, RHS, OrigI))
   3797         return SetResult(Builder.CreateNSWSub(LHS, RHS), Builder.getFalse(),
   3798                          true);
   3799     } else {
   3800       if (willNotOverflowUnsignedSub(LHS, RHS, OrigI))
   3801         return SetResult(Builder.CreateNUWSub(LHS, RHS), Builder.getFalse(),
   3802                          true);
   3803     }
   3804     break;
   3805   }
   3806 
   3807   case OCF_UNSIGNED_MUL: {
   3808     OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI);
   3809     if (OR == OverflowResult::NeverOverflows)
   3810       return SetResult(Builder.CreateNUWMul(LHS, RHS), Builder.getFalse(),
   3811                        true);
   3812     if (OR == OverflowResult::AlwaysOverflows)
   3813       return SetResult(Builder.CreateMul(LHS, RHS), Builder.getTrue(), true);
   3814     LLVM_FALLTHROUGH;
   3815   }
   3816   case OCF_SIGNED_MUL:
   3817     // X * undef -> undef
   3818     if (isa<UndefValue>(RHS))
   3819       return SetResult(RHS, UndefValue::get(Builder.getInt1Ty()), false);
   3820 
   3821     // X * 0 -> {0, false}
   3822     if (match(RHS, m_Zero()))
   3823       return SetResult(RHS, Builder.getFalse(), false);
   3824 
   3825     // X * 1 -> {X, false}
   3826     if (match(RHS, m_One()))
   3827       return SetResult(LHS, Builder.getFalse(), false);
   3828 
   3829     if (OCF == OCF_SIGNED_MUL)
   3830       if (willNotOverflowSignedMul(LHS, RHS, OrigI))
   3831         return SetResult(Builder.CreateNSWMul(LHS, RHS), Builder.getFalse(),
   3832                          true);
   3833     break;
   3834   }
   3835 
   3836   return false;
   3837 }
   3838 
   3839 /// Recognize and process idiom involving test for multiplication
   3840 /// overflow.
   3841 ///
   3842 /// The caller has matched a pattern of the form:
   3843 ///   I = cmp u (mul(zext A, zext B), V
   3844 /// The function checks if this is a test for overflow and if so replaces
   3845 /// multiplication with call to 'mul.with.overflow' intrinsic.
   3846 ///
   3847 /// \param I Compare instruction.
   3848 /// \param MulVal Result of 'mult' instruction.  It is one of the arguments of
   3849 ///               the compare instruction.  Must be of integer type.
   3850 /// \param OtherVal The other argument of compare instruction.
   3851 /// \returns Instruction which must replace the compare instruction, NULL if no
   3852 ///          replacement required.
   3853 static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
   3854                                          Value *OtherVal, InstCombiner &IC) {
   3855   // Don't bother doing this transformation for pointers, don't do it for
   3856   // vectors.
   3857   if (!isa<IntegerType>(MulVal->getType()))
   3858     return nullptr;
   3859 
   3860   assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal);
   3861   assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal);
   3862   auto *MulInstr = dyn_cast<Instruction>(MulVal);
   3863   if (!MulInstr)
   3864     return nullptr;
   3865   assert(MulInstr->getOpcode() == Instruction::Mul);
   3866 
   3867   auto *LHS = cast<ZExtOperator>(MulInstr->getOperand(0)),
   3868        *RHS = cast<ZExtOperator>(MulInstr->getOperand(1));
   3869   assert(LHS->getOpcode() == Instruction::ZExt);
   3870   assert(RHS->getOpcode() == Instruction::ZExt);
   3871   Value *A = LHS->getOperand(0), *B = RHS->getOperand(0);
   3872 
   3873   // Calculate type and width of the result produced by mul.with.overflow.
   3874   Type *TyA = A->getType(), *TyB = B->getType();
   3875   unsigned WidthA = TyA->getPrimitiveSizeInBits(),
   3876            WidthB = TyB->getPrimitiveSizeInBits();
   3877   unsigned MulWidth;
   3878   Type *MulType;
   3879   if (WidthB > WidthA) {
   3880     MulWidth = WidthB;
   3881     MulType = TyB;
   3882   } else {
   3883     MulWidth = WidthA;
   3884     MulType = TyA;
   3885   }
   3886 
   3887   // In order to replace the original mul with a narrower mul.with.overflow,
   3888   // all uses must ignore upper bits of the product.  The number of used low
   3889   // bits must be not greater than the width of mul.with.overflow.
   3890   if (MulVal->hasNUsesOrMore(2))
   3891     for (User *U : MulVal->users()) {
   3892       if (U == &I)
   3893         continue;
   3894       if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
   3895         // Check if truncation ignores bits above MulWidth.
   3896         unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits();
   3897         if (TruncWidth > MulWidth)
   3898           return nullptr;
   3899       } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
   3900         // Check if AND ignores bits above MulWidth.
   3901         if (BO->getOpcode() != Instruction::And)
   3902           return nullptr;
   3903         if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
   3904           const APInt &CVal = CI->getValue();
   3905           if (CVal.getBitWidth() - CVal.countLeadingZeros() > MulWidth)
   3906             return nullptr;
   3907         } else {
   3908           // In this case we could have the operand of the binary operation
   3909           // being defined in another block, and performing the replacement
   3910           // could break the dominance relation.
   3911           return nullptr;
   3912         }
   3913       } else {
   3914         // Other uses prohibit this transformation.
   3915         return nullptr;
   3916       }
   3917     }
   3918 
   3919   // Recognize patterns
   3920   switch (I.getPredicate()) {
   3921   case ICmpInst::ICMP_EQ:
   3922   case ICmpInst::ICMP_NE:
   3923     // Recognize pattern:
   3924     //   mulval = mul(zext A, zext B)
   3925     //   cmp eq/neq mulval, zext trunc mulval
   3926     if (ZExtInst *Zext = dyn_cast<ZExtInst>(OtherVal))
   3927       if (Zext->hasOneUse()) {
   3928         Value *ZextArg = Zext->getOperand(0);
   3929         if (TruncInst *Trunc = dyn_cast<TruncInst>(ZextArg))
   3930           if (Trunc->getType()->getPrimitiveSizeInBits() == MulWidth)
   3931             break; //Recognized
   3932       }
   3933 
   3934     // Recognize pattern:
   3935     //   mulval = mul(zext A, zext B)
   3936     //   cmp eq/neq mulval, and(mulval, mask), mask selects low MulWidth bits.
   3937     ConstantInt *CI;
   3938     Value *ValToMask;
   3939     if (match(OtherVal, m_And(m_Value(ValToMask), m_ConstantInt(CI)))) {
   3940       if (ValToMask != MulVal)
   3941         return nullptr;
   3942       const APInt &CVal = CI->getValue() + 1;
   3943       if (CVal.isPowerOf2()) {
   3944         unsigned MaskWidth = CVal.logBase2();
   3945         if (MaskWidth == MulWidth)
   3946           break; // Recognized
   3947       }
   3948     }
   3949     return nullptr;
   3950 
   3951   case ICmpInst::ICMP_UGT:
   3952     // Recognize pattern:
   3953     //   mulval = mul(zext A, zext B)
   3954     //   cmp ugt mulval, max
   3955     if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
   3956       APInt MaxVal = APInt::getMaxValue(MulWidth);
   3957       MaxVal = MaxVal.zext(CI->getBitWidth());
   3958       if (MaxVal.eq(CI->getValue()))
   3959         break; // Recognized
   3960     }
   3961     return nullptr;
   3962 
   3963   case ICmpInst::ICMP_UGE:
   3964     // Recognize pattern:
   3965     //   mulval = mul(zext A, zext B)
   3966     //   cmp uge mulval, max+1
   3967     if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
   3968       APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);
   3969       if (MaxVal.eq(CI->getValue()))
   3970         break; // Recognized
   3971     }
   3972     return nullptr;
   3973 
   3974   case ICmpInst::ICMP_ULE:
   3975     // Recognize pattern:
   3976     //   mulval = mul(zext A, zext B)
   3977     //   cmp ule mulval, max
   3978     if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
   3979       APInt MaxVal = APInt::getMaxValue(MulWidth);
   3980       MaxVal = MaxVal.zext(CI->getBitWidth());
   3981       if (MaxVal.eq(CI->getValue()))
   3982         break; // Recognized
   3983     }
   3984     return nullptr;
   3985 
   3986   case ICmpInst::ICMP_ULT:
   3987     // Recognize pattern:
   3988     //   mulval = mul(zext A, zext B)
   3989     //   cmp ule mulval, max + 1
   3990     if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
   3991       APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);
   3992       if (MaxVal.eq(CI->getValue()))
   3993         break; // Recognized
   3994     }
   3995     return nullptr;
   3996 
   3997   default:
   3998     return nullptr;
   3999   }
   4000 
   4001   InstCombiner::BuilderTy &Builder = IC.Builder;
   4002   Builder.SetInsertPoint(MulInstr);
   4003 
   4004   // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B)
   4005   Value *MulA = A, *MulB = B;
   4006   if (WidthA < MulWidth)
   4007     MulA = Builder.CreateZExt(A, MulType);
   4008   if (WidthB < MulWidth)
   4009     MulB = Builder.CreateZExt(B, MulType);
   4010   Value *F = Intrinsic::getDeclaration(I.getModule(),
   4011                                        Intrinsic::umul_with_overflow, MulType);
   4012   CallInst *Call = Builder.CreateCall(F, {MulA, MulB}, "umul");
   4013   IC.Worklist.Add(MulInstr);
   4014 
   4015   // If there are uses of mul result other than the comparison, we know that
   4016   // they are truncation or binary AND. Change them to use result of
   4017   // mul.with.overflow and adjust properly mask/size.
   4018   if (MulVal->hasNUsesOrMore(2)) {
   4019     Value *Mul = Builder.CreateExtractValue(Call, 0, "umul.value");
   4020     for (auto UI = MulVal->user_begin(), UE = MulVal->user_end(); UI != UE;) {
   4021       User *U = *UI++;
   4022       if (U == &I || U == OtherVal)
   4023         continue;
   4024       if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
   4025         if (TI->getType()->getPrimitiveSizeInBits() == MulWidth)
   4026           IC.replaceInstUsesWith(*TI, Mul);
   4027         else
   4028           TI->setOperand(0, Mul);
   4029       } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
   4030         assert(BO->getOpcode() == Instruction::And);
   4031         // Replace (mul & mask) --> zext (mul.with.overflow & short_mask)
   4032         ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
   4033         APInt ShortMask = CI->getValue().trunc(MulWidth);
   4034         Value *ShortAnd = Builder.CreateAnd(Mul, ShortMask);
   4035         Instruction *Zext =
   4036             cast<Instruction>(Builder.CreateZExt(ShortAnd, BO->getType()));
   4037         IC.Worklist.Add(Zext);
   4038         IC.replaceInstUsesWith(*BO, Zext);
   4039       } else {
   4040         llvm_unreachable("Unexpected Binary operation");
   4041       }
   4042       IC.Worklist.Add(cast<Instruction>(U));
   4043     }
   4044   }
   4045   if (isa<Instruction>(OtherVal))
   4046     IC.Worklist.Add(cast<Instruction>(OtherVal));
   4047 
   4048   // The original icmp gets replaced with the overflow value, maybe inverted
   4049   // depending on predicate.
   4050   bool Inverse = false;
   4051   switch (I.getPredicate()) {
   4052   case ICmpInst::ICMP_NE:
   4053     break;
   4054   case ICmpInst::ICMP_EQ:
   4055     Inverse = true;
   4056     break;
   4057   case ICmpInst::ICMP_UGT:
   4058   case ICmpInst::ICMP_UGE:
   4059     if (I.getOperand(0) == MulVal)
   4060       break;
   4061     Inverse = true;
   4062     break;
   4063   case ICmpInst::ICMP_ULT:
   4064   case ICmpInst::ICMP_ULE:
   4065     if (I.getOperand(1) == MulVal)
   4066       break;
   4067     Inverse = true;
   4068     break;
   4069   default:
   4070     llvm_unreachable("Unexpected predicate");
   4071   }
   4072   if (Inverse) {
   4073     Value *Res = Builder.CreateExtractValue(Call, 1);
   4074     return BinaryOperator::CreateNot(Res);
   4075   }
   4076 
   4077   return ExtractValueInst::Create(Call, 1);
   4078 }
   4079 
   4080 /// When performing a comparison against a constant, it is possible that not all
   4081 /// the bits in the LHS are demanded. This helper method computes the mask that
   4082 /// IS demanded.
   4083 static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth) {
   4084   const APInt *RHS;
   4085   if (!match(I.getOperand(1), m_APInt(RHS)))
   4086     return APInt::getAllOnesValue(BitWidth);
   4087 
   4088   // If this is a normal comparison, it demands all bits. If it is a sign bit
   4089   // comparison, it only demands the sign bit.
   4090   bool UnusedBit;
   4091   if (isSignBitCheck(I.getPredicate(), *RHS, UnusedBit))
   4092     return APInt::getSignMask(BitWidth);
   4093 
   4094   switch (I.getPredicate()) {
   4095   // For a UGT comparison, we don't care about any bits that
   4096   // correspond to the trailing ones of the comparand.  The value of these
   4097   // bits doesn't impact the outcome of the comparison, because any value
   4098   // greater than the RHS must differ in a bit higher than these due to carry.
   4099   case ICmpInst::ICMP_UGT:
   4100     return APInt::getBitsSetFrom(BitWidth, RHS->countTrailingOnes());
   4101 
   4102   // Similarly, for a ULT comparison, we don't care about the trailing zeros.
   4103   // Any value less than the RHS must differ in a higher bit because of carries.
   4104   case ICmpInst::ICMP_ULT:
   4105     return APInt::getBitsSetFrom(BitWidth, RHS->countTrailingZeros());
   4106 
   4107   default:
   4108     return APInt::getAllOnesValue(BitWidth);
   4109   }
   4110 }
   4111 
   4112 /// Check if the order of \p Op0 and \p Op1 as operands in an ICmpInst
   4113 /// should be swapped.
   4114 /// The decision is based on how many times these two operands are reused
   4115 /// as subtract operands and their positions in those instructions.
   4116 /// The rationale is that several architectures use the same instruction for
   4117 /// both subtract and cmp. Thus, it is better if the order of those operands
   4118 /// match.
   4119 /// \return true if Op0 and Op1 should be swapped.
   4120 static bool swapMayExposeCSEOpportunities(const Value *Op0, const Value *Op1) {
   4121   // Filter out pointer values as those cannot appear directly in subtract.
   4122   // FIXME: we may want to go through inttoptrs or bitcasts.
   4123   if (Op0->getType()->isPointerTy())
   4124     return false;
   4125   // If a subtract already has the same operands as a compare, swapping would be
   4126   // bad. If a subtract has the same operands as a compare but in reverse order,
   4127   // then swapping is good.
   4128   int GoodToSwap = 0;
   4129   for (const User *U : Op0->users()) {
   4130     if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
   4131       GoodToSwap++;
   4132     else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
   4133       GoodToSwap--;
   4134   }
   4135   return GoodToSwap > 0;
   4136 }
   4137 
   4138 /// Check that one use is in the same block as the definition and all
   4139 /// other uses are in blocks dominated by a given block.
   4140 ///
   4141 /// \param DI Definition
   4142 /// \param UI Use
   4143 /// \param DB Block that must dominate all uses of \p DI outside
   4144 ///           the parent block
   4145 /// \return true when \p UI is the only use of \p DI in the parent block
   4146 /// and all other uses of \p DI are in blocks dominated by \p DB.
   4147 ///
   4148 bool InstCombiner::dominatesAllUses(const Instruction *DI,
   4149                                     const Instruction *UI,
   4150                                     const BasicBlock *DB) const {
   4151   assert(DI && UI && "Instruction not defined\n");
   4152   // Ignore incomplete definitions.
   4153   if (!DI->getParent())
   4154     return false;
   4155   // DI and UI must be in the same block.
   4156   if (DI->getParent() != UI->getParent())
   4157     return false;
   4158   // Protect from self-referencing blocks.
   4159   if (DI->getParent() == DB)
   4160     return false;
   4161   for (const User *U : DI->users()) {
   4162     auto *Usr = cast<Instruction>(U);
   4163     if (Usr != UI && !DT.dominates(DB, Usr->getParent()))
   4164       return false;
   4165   }
   4166   return true;
   4167 }
   4168 
   4169 /// Return true when the instruction sequence within a block is select-cmp-br.
   4170 static bool isChainSelectCmpBranch(const SelectInst *SI) {
   4171   const BasicBlock *BB = SI->getParent();
   4172   if (!BB)
   4173     return false;
   4174   auto *BI = dyn_cast_or_null<BranchInst>(BB->getTerminator());
   4175   if (!BI || BI->getNumSuccessors() != 2)
   4176     return false;
   4177   auto *IC = dyn_cast<ICmpInst>(BI->getCondition());
   4178   if (!IC || (IC->getOperand(0) != SI && IC->getOperand(1) != SI))
   4179     return false;
   4180   return true;
   4181 }
   4182 
   4183 /// True when a select result is replaced by one of its operands
   4184 /// in select-icmp sequence. This will eventually result in the elimination
   4185 /// of the select.
   4186 ///
   4187 /// \param SI    Select instruction
   4188 /// \param Icmp  Compare instruction
   4189 /// \param SIOpd Operand that replaces the select
   4190 ///
   4191 /// Notes:
   4192 /// - The replacement is global and requires dominator information
   4193 /// - The caller is responsible for the actual replacement
   4194 ///
   4195 /// Example:
   4196 ///
   4197 /// entry:
   4198 ///  %4 = select i1 %3, %C* %0, %C* null
   4199 ///  %5 = icmp eq %C* %4, null
   4200 ///  br i1 %5, label %9, label %7
   4201 ///  ...
   4202 ///  ; <label>:7                                       ; preds = %entry
   4203 ///  %8 = getelementptr inbounds %C* %4, i64 0, i32 0
   4204 ///  ...
   4205 ///
   4206 /// can be transformed to
   4207 ///
   4208 ///  %5 = icmp eq %C* %0, null
   4209 ///  %6 = select i1 %3, i1 %5, i1 true
   4210 ///  br i1 %6, label %9, label %7
   4211 ///  ...
   4212 ///  ; <label>:7                                       ; preds = %entry
   4213 ///  %8 = getelementptr inbounds %C* %0, i64 0, i32 0  // replace by %0!
   4214 ///
   4215 /// Similar when the first operand of the select is a constant or/and
   4216 /// the compare is for not equal rather than equal.
   4217 ///
   4218 /// NOTE: The function is only called when the select and compare constants
   4219 /// are equal, the optimization can work only for EQ predicates. This is not a
   4220 /// major restriction since a NE compare should be 'normalized' to an equal
   4221 /// compare, which usually happens in the combiner and test case
   4222 /// select-cmp-br.ll checks for it.
   4223 bool InstCombiner::replacedSelectWithOperand(SelectInst *SI,
   4224                                              const ICmpInst *Icmp,
   4225                                              const unsigned SIOpd) {
   4226   assert((SIOpd == 1 || SIOpd == 2) && "Invalid select operand!");
   4227   if (isChainSelectCmpBranch(SI) && Icmp->getPredicate() == ICmpInst::ICMP_EQ) {
   4228     BasicBlock *Succ = SI->getParent()->getTerminator()->getSuccessor(1);
   4229     // The check for the single predecessor is not the best that can be
   4230     // done. But it protects efficiently against cases like when SI's
   4231     // home block has two successors, Succ and Succ1, and Succ1 predecessor
   4232     // of Succ. Then SI can't be replaced by SIOpd because the use that gets
   4233     // replaced can be reached on either path. So the uniqueness check
   4234     // guarantees that the path all uses of SI (outside SI's parent) are on
   4235     // is disjoint from all other paths out of SI. But that information
   4236     // is more expensive to compute, and the trade-off here is in favor
   4237     // of compile-time. It should also be noticed that we check for a single
   4238     // predecessor and not only uniqueness. This to handle the situation when
   4239     // Succ and Succ1 points to the same basic block.
   4240     if (Succ->getSinglePredecessor() && dominatesAllUses(SI, Icmp, Succ)) {
   4241       NumSel++;
   4242       SI->replaceUsesOutsideBlock(SI->getOperand(SIOpd), SI->getParent());
   4243       return true;
   4244     }
   4245   }
   4246   return false;
   4247 }
   4248 
   4249 /// Try to fold the comparison based on range information we can get by checking
   4250 /// whether bits are known to be zero or one in the inputs.
   4251 Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
   4252   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
   4253   Type *Ty = Op0->getType();
   4254   ICmpInst::Predicate Pred = I.getPredicate();
   4255 
   4256   // Get scalar or pointer size.
   4257   unsigned BitWidth = Ty->isIntOrIntVectorTy()
   4258                           ? Ty->getScalarSizeInBits()
   4259                           : DL.getIndexTypeSizeInBits(Ty->getScalarType());
   4260 
   4261   if (!BitWidth)
   4262     return nullptr;
   4263 
   4264   KnownBits Op0Known(BitWidth);
   4265   KnownBits Op1Known(BitWidth);
   4266 
   4267   if (SimplifyDemandedBits(&I, 0,
   4268                            getDemandedBitsLHSMask(I, BitWidth),
   4269                            Op0Known, 0))
   4270     return &I;
   4271 
   4272   if (SimplifyDemandedBits(&I, 1, APInt::getAllOnesValue(BitWidth),
   4273                            Op1Known, 0))
   4274     return &I;
   4275 
   4276   // Given the known and unknown bits, compute a range that the LHS could be
   4277   // in.  Compute the Min, Max and RHS values based on the known bits. For the
   4278   // EQ and NE we use unsigned values.
   4279   APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
   4280   APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
   4281   if (I.isSigned()) {
   4282     computeSignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max);
   4283     computeSignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max);
   4284   } else {
   4285     computeUnsignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max);
   4286     computeUnsignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max);
   4287   }
   4288 
   4289   // If Min and Max are known to be the same, then SimplifyDemandedBits figured
   4290   // out that the LHS or RHS is a constant. Constant fold this now, so that
   4291   // code below can assume that Min != Max.
   4292   if (!isa<Constant>(Op0) && Op0Min == Op0Max)
   4293     return new ICmpInst(Pred, ConstantExpr::getIntegerValue(Ty, Op0Min), Op1);
   4294   if (!isa<Constant>(Op1) && Op1Min == Op1Max)
   4295     return new ICmpInst(Pred, Op0, ConstantExpr::getIntegerValue(Ty, Op1Min));
   4296 
   4297   // Based on the range information we know about the LHS, see if we can
   4298   // simplify this comparison.  For example, (x&4) < 8 is always true.
   4299   switch (Pred) {
   4300   default:
   4301     llvm_unreachable("Unknown icmp opcode!");
   4302   case ICmpInst::ICMP_EQ:
   4303   case ICmpInst::ICMP_NE: {
   4304     if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) {
   4305       return Pred == CmpInst::ICMP_EQ
   4306                  ? replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()))
   4307                  : replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
   4308     }
   4309 
   4310     // If all bits are known zero except for one, then we know at most one bit
   4311     // is set. If the comparison is against zero, then this is a check to see if
   4312     // *that* bit is set.
   4313     APInt Op0KnownZeroInverted = ~Op0Known.Zero;
   4314     if (Op1Known.isZero()) {
   4315       // If the LHS is an AND with the same constant, look through it.
   4316       Value *LHS = nullptr;
   4317       const APInt *LHSC;
   4318       if (!match(Op0, m_And(m_Value(LHS), m_APInt(LHSC))) ||
   4319           *LHSC != Op0KnownZeroInverted)
   4320         LHS = Op0;
   4321 
   4322       Value *X;
   4323       if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
   4324         APInt ValToCheck = Op0KnownZeroInverted;
   4325         Type *XTy = X->getType();
   4326         if (ValToCheck.isPowerOf2()) {
   4327           // ((1 << X) & 8) == 0 -> X != 3
   4328           // ((1 << X) & 8) != 0 -> X == 3
   4329           auto *CmpC = ConstantInt::get(XTy, ValToCheck.countTrailingZeros());
   4330           auto NewPred = ICmpInst::getInversePredicate(Pred);
   4331           return new ICmpInst(NewPred, X, CmpC);
   4332         } else if ((++ValToCheck).isPowerOf2()) {
   4333           // ((1 << X) & 7) == 0 -> X >= 3
   4334           // ((1 << X) & 7) != 0 -> X  < 3
   4335           auto *CmpC = ConstantInt::get(XTy, ValToCheck.countTrailingZeros());
   4336           auto NewPred =
   4337               Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGE : CmpInst::ICMP_ULT;
   4338           return new ICmpInst(NewPred, X, CmpC);
   4339         }
   4340       }
   4341 
   4342       // Check if the LHS is 8 >>u x and the result is a power of 2 like 1.
   4343       const APInt *CI;
   4344       if (Op0KnownZeroInverted.isOneValue() &&
   4345           match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) {
   4346         // ((8 >>u X) & 1) == 0 -> X != 3
   4347         // ((8 >>u X) & 1) != 0 -> X == 3
   4348         unsigned CmpVal = CI->countTrailingZeros();
   4349         auto NewPred = ICmpInst::getInversePredicate(Pred);
   4350         return new ICmpInst(NewPred, X, ConstantInt::get(X->getType(), CmpVal));
   4351       }
   4352     }
   4353     break;
   4354   }
   4355   case ICmpInst::ICMP_ULT: {
   4356     if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B)
   4357       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
   4358     if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B)
   4359       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
   4360     if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
   4361       return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
   4362 
   4363     const APInt *CmpC;
   4364     if (match(Op1, m_APInt(CmpC))) {
   4365       // A <u C -> A == C-1 if min(A)+1 == C
   4366       if (*CmpC == Op0Min + 1)
   4367         return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
   4368                             ConstantInt::get(Op1->getType(), *CmpC - 1));
   4369       // X <u C --> X == 0, if the number of zero bits in the bottom of X
   4370       // exceeds the log2 of C.
   4371       if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2())
   4372         return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
   4373                             Constant::getNullValue(Op1->getType()));
   4374     }
   4375     break;
   4376   }
   4377   case ICmpInst::ICMP_UGT: {
   4378     if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B)
   4379       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
   4380     if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B)
   4381       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
   4382     if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
   4383       return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
   4384 
   4385     const APInt *CmpC;
   4386     if (match(Op1, m_APInt(CmpC))) {
   4387       // A >u C -> A == C+1 if max(a)-1 == C
   4388       if (*CmpC == Op0Max - 1)
   4389         return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
   4390                             ConstantInt::get(Op1->getType(), *CmpC + 1));
   4391       // X >u C --> X != 0, if the number of zero bits in the bottom of X
   4392       // exceeds the log2 of C.
   4393       if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits())
   4394         return new ICmpInst(ICmpInst::ICMP_NE, Op0,
   4395                             Constant::getNullValue(Op1->getType()));
   4396     }
   4397     break;
   4398   }
   4399   case ICmpInst::ICMP_SLT: {
   4400     if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C)
   4401       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
   4402     if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C)
   4403       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
   4404     if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
   4405       return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
   4406     const APInt *CmpC;
   4407     if (match(Op1, m_APInt(CmpC))) {
   4408       if (*CmpC == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C
   4409         return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
   4410                             ConstantInt::get(Op1->getType(), *CmpC - 1));
   4411     }
   4412     break;
   4413   }
   4414   case ICmpInst::ICMP_SGT: {
   4415     if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B)
   4416       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
   4417     if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B)
   4418       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
   4419     if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
   4420       return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
   4421     const APInt *CmpC;
   4422     if (match(Op1, m_APInt(CmpC))) {
   4423       if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C
   4424         return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
   4425                             ConstantInt::get(Op1->getType(), *CmpC + 1));
   4426     }
   4427     break;
   4428   }
   4429   case ICmpInst::ICMP_SGE:
   4430     assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
   4431     if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B)
   4432       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
   4433     if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B)
   4434       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
   4435     if (Op1Min == Op0Max) // A >=s B -> A == B if max(A) == min(B)
   4436       return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
   4437     break;
   4438   case ICmpInst::ICMP_SLE:
   4439     assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
   4440     if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B)
   4441       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
   4442     if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B)
   4443       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
   4444     if (Op1Max == Op0Min) // A <=s B -> A == B if min(A) == max(B)
   4445       return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
   4446     break;
   4447   case ICmpInst::ICMP_UGE:
   4448     assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
   4449     if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B)
   4450       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
   4451     if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B)
   4452       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
   4453     if (Op1Min == Op0Max) // A >=u B -> A == B if max(A) == min(B)
   4454       return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
   4455     break;
   4456   case ICmpInst::ICMP_ULE:
   4457     assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
   4458     if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B)
   4459       return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
   4460     if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B)
   4461       return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
   4462     if (Op1Max == Op0Min) // A <=u B -> A == B if min(A) == max(B)
   4463       return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
   4464     break;
   4465   }
   4466 
   4467   // Turn a signed comparison into an unsigned one if both operands are known to
   4468   // have the same sign.
   4469   if (I.isSigned() &&
   4470       ((Op0Known.Zero.isNegative() && Op1Known.Zero.isNegative()) ||
   4471        (Op0Known.One.isNegative() && Op1Known.One.isNegative())))
   4472     return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
   4473 
   4474   return nullptr;
   4475 }
   4476 
   4477 /// If we have an icmp le or icmp ge instruction with a constant operand, turn
   4478 /// it into the appropriate icmp lt or icmp gt instruction. This transform
   4479 /// allows them to be folded in visitICmpInst.
   4480 static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) {
   4481   ICmpInst::Predicate Pred = I.getPredicate();
   4482   if (Pred != ICmpInst::ICMP_SLE && Pred != ICmpInst::ICMP_SGE &&
   4483       Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_UGE)
   4484     return nullptr;
   4485 
   4486   Value *Op0 = I.getOperand(0);
   4487   Value *Op1 = I.getOperand(1);
   4488   auto *Op1C = dyn_cast<Constant>(Op1);
   4489   if (!Op1C)
   4490     return nullptr;
   4491 
   4492   // Check if the constant operand can be safely incremented/decremented without
   4493   // overflowing/underflowing. For scalars, SimplifyICmpInst has already handled
   4494   // the edge cases for us, so we just assert on them. For vectors, we must
   4495   // handle the edge cases.
   4496   Type *Op1Type = Op1->getType();
   4497   bool IsSigned = I.isSigned();
   4498   bool IsLE = (Pred == ICmpInst::ICMP_SLE || Pred == ICmpInst::ICMP_ULE);
   4499   auto *CI = dyn_cast<ConstantInt>(Op1C);
   4500   if (CI) {
   4501     // A <= MAX -> TRUE ; A >= MIN -> TRUE
   4502     assert(IsLE ? !CI->isMaxValue(IsSigned) : !CI->isMinValue(IsSigned));
   4503   } else if (Op1Type->isVectorTy()) {
   4504     // TODO? If the edge cases for vectors were guaranteed to be handled as they
   4505     // are for scalar, we could remove the min/max checks. However, to do that,
   4506     // we would have to use insertelement/shufflevector to replace edge values.
   4507     unsigned NumElts = Op1Type->getVectorNumElements();
   4508     for (unsigned i = 0; i != NumElts; ++i) {
   4509       Constant *Elt = Op1C->getAggregateElement(i);
   4510       if (!Elt)
   4511         return nullptr;
   4512 
   4513       if (isa<UndefValue>(Elt))
   4514         continue;
   4515 
   4516       // Bail out if we can't determine if this constant is min/max or if we
   4517       // know that this constant is min/max.
   4518       auto *CI = dyn_cast<ConstantInt>(Elt);
   4519       if (!CI || (IsLE ? CI->isMaxValue(IsSigned) : CI->isMinValue(IsSigned)))
   4520         return nullptr;
   4521     }
   4522   } else {
   4523     // ConstantExpr?
   4524     return nullptr;
   4525   }
   4526 
   4527   // Increment or decrement the constant and set the new comparison predicate:
   4528   // ULE -> ULT ; UGE -> UGT ; SLE -> SLT ; SGE -> SGT
   4529   Constant *OneOrNegOne = ConstantInt::get(Op1Type, IsLE ? 1 : -1, true);
   4530   CmpInst::Predicate NewPred = IsLE ? ICmpInst::ICMP_ULT: ICmpInst::ICMP_UGT;
   4531   NewPred = IsSigned ? ICmpInst::getSignedPredicate(NewPred) : NewPred;
   4532   return new ICmpInst(NewPred, Op0, ConstantExpr::getAdd(Op1C, OneOrNegOne));
   4533 }
   4534 
   4535 /// Integer compare with boolean values can always be turned into bitwise ops.
   4536 static Instruction *canonicalizeICmpBool(ICmpInst &I,
   4537                                          InstCombiner::BuilderTy &Builder) {
   4538   Value *A = I.getOperand(0), *B = I.getOperand(1);
   4539   assert(A->getType()->isIntOrIntVectorTy(1) && "Bools only");
   4540 
   4541   // A boolean compared to true/false can be simplified to Op0/true/false in
   4542   // 14 out of the 20 (10 predicates * 2 constants) possible combinations.
   4543   // Cases not handled by InstSimplify are always 'not' of Op0.
   4544   if (match(B, m_Zero())) {
   4545     switch (I.getPredicate()) {
   4546       case CmpInst::ICMP_EQ:  // A ==   0 -> !A
   4547       case CmpInst::ICMP_ULE: // A <=u  0 -> !A
   4548       case CmpInst::ICMP_SGE: // A >=s  0 -> !A
   4549         return BinaryOperator::CreateNot(A);
   4550       default:
   4551         llvm_unreachable("ICmp i1 X, C not simplified as expected.");
   4552     }
   4553   } else if (match(B, m_One())) {
   4554     switch (I.getPredicate()) {
   4555       case CmpInst::ICMP_NE:  // A !=  1 -> !A
   4556       case CmpInst::ICMP_ULT: // A <u  1 -> !A
   4557       case CmpInst::ICMP_SGT: // A >s -1 -> !A
   4558         return BinaryOperator::CreateNot(A);
   4559       default:
   4560         llvm_unreachable("ICmp i1 X, C not simplified as expected.");
   4561     }
   4562   }
   4563 
   4564   switch (I.getPredicate()) {
   4565   default:
   4566     llvm_unreachable("Invalid icmp instruction!");
   4567   case ICmpInst::ICMP_EQ:
   4568     // icmp eq i1 A, B -> ~(A ^ B)
   4569     return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
   4570 
   4571   case ICmpInst::ICMP_NE:
   4572     // icmp ne i1 A, B -> A ^ B
   4573     return BinaryOperator::CreateXor(A, B);
   4574 
   4575   case ICmpInst::ICMP_UGT:
   4576     // icmp ugt -> icmp ult
   4577     std::swap(A, B);
   4578     LLVM_FALLTHROUGH;
   4579   case ICmpInst::ICMP_ULT:
   4580     // icmp ult i1 A, B -> ~A & B
   4581     return BinaryOperator::CreateAnd(Builder.CreateNot(A), B);
   4582 
   4583   case ICmpInst::ICMP_SGT:
   4584     // icmp sgt -> icmp slt
   4585     std::swap(A, B);
   4586     LLVM_FALLTHROUGH;
   4587   case ICmpInst::ICMP_SLT:
   4588     // icmp slt i1 A, B -> A & ~B
   4589     return BinaryOperator::CreateAnd(Builder.CreateNot(B), A);
   4590 
   4591   case ICmpInst::ICMP_UGE:
   4592     // icmp uge -> icmp ule
   4593     std::swap(A, B);
   4594     LLVM_FALLTHROUGH;
   4595   case ICmpInst::ICMP_ULE:
   4596     // icmp ule i1 A, B -> ~A | B
   4597     return BinaryOperator::CreateOr(Builder.CreateNot(A), B);
   4598 
   4599   case ICmpInst::ICMP_SGE:
   4600     // icmp sge -> icmp sle
   4601     std::swap(A, B);
   4602     LLVM_FALLTHROUGH;
   4603   case ICmpInst::ICMP_SLE:
   4604     // icmp sle i1 A, B -> A | ~B
   4605     return BinaryOperator::CreateOr(Builder.CreateNot(B), A);
   4606   }
   4607 }
   4608 
   4609 Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   4610   bool Changed = false;
   4611   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
   4612   unsigned Op0Cplxity = getComplexity(Op0);
   4613   unsigned Op1Cplxity = getComplexity(Op1);
   4614 
   4615   /// Orders the operands of the compare so that they are listed from most
   4616   /// complex to least complex.  This puts constants before unary operators,
   4617   /// before binary operators.
   4618   if (Op0Cplxity < Op1Cplxity ||
   4619       (Op0Cplxity == Op1Cplxity && swapMayExposeCSEOpportunities(Op0, Op1))) {
   4620     I.swapOperands();
   4621     std::swap(Op0, Op1);
   4622     Changed = true;
   4623   }
   4624 
   4625   if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1,
   4626                                   SQ.getWithInstruction(&I)))
   4627     return replaceInstUsesWith(I, V);
   4628 
   4629   // Comparing -val or val with non-zero is the same as just comparing val
   4630   // ie, abs(val) != 0 -> val != 0
   4631   if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero())) {
   4632     Value *Cond, *SelectTrue, *SelectFalse;
   4633     if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue),
   4634                             m_Value(SelectFalse)))) {
   4635       if (Value *V = dyn_castNegVal(SelectTrue)) {
   4636         if (V == SelectFalse)
   4637           return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
   4638       }
   4639       else if (Value *V = dyn_castNegVal(SelectFalse)) {
   4640         if (V == SelectTrue)
   4641           return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
   4642       }
   4643     }
   4644   }
   4645 
   4646   if (Op0->getType()->isIntOrIntVectorTy(1))
   4647     if (Instruction *Res = canonicalizeICmpBool(I, Builder))
   4648       return Res;
   4649 
   4650   if (ICmpInst *NewICmp = canonicalizeCmpWithConstant(I))
   4651     return NewICmp;
   4652 
   4653   if (Instruction *Res = foldICmpWithConstant(I))
   4654     return Res;
   4655 
   4656   if (Instruction *Res = foldICmpUsingKnownBits(I))
   4657     return Res;
   4658 
   4659   // Test if the ICmpInst instruction is used exclusively by a select as
   4660   // part of a minimum or maximum operation. If so, refrain from doing
   4661   // any other folding. This helps out other analyses which understand
   4662   // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
   4663   // and CodeGen. And in this case, at least one of the comparison
   4664   // operands has at least one user besides the compare (the select),
   4665   // which would often largely negate the benefit of folding anyway.
   4666   //
   4667   // Do the same for the other patterns recognized by matchSelectPattern.
   4668   if (I.hasOneUse())
   4669     if (SelectInst *SI = dyn_cast<SelectInst>(I.user_back())) {
   4670       Value *A, *B;
   4671       SelectPatternResult SPR = matchSelectPattern(SI, A, B);
   4672       if (SPR.Flavor != SPF_UNKNOWN)
   4673         return nullptr;
   4674     }
   4675 
   4676   // Do this after checking for min/max to prevent infinite looping.
   4677   if (Instruction *Res = foldICmpWithZero(I))
   4678     return Res;
   4679 
   4680   // FIXME: We only do this after checking for min/max to prevent infinite
   4681   // looping caused by a reverse canonicalization of these patterns for min/max.
   4682   // FIXME: The organization of folds is a mess. These would naturally go into
   4683   // canonicalizeCmpWithConstant(), but we can't move all of the above folds
   4684   // down here after the min/max restriction.
   4685   ICmpInst::Predicate Pred = I.getPredicate();
   4686   const APInt *C;
   4687   if (match(Op1, m_APInt(C))) {
   4688     // For i32: x >u 2147483647 -> x <s 0  -> true if sign bit set
   4689     if (Pred == ICmpInst::ICMP_UGT && C->isMaxSignedValue()) {
   4690       Constant *Zero = Constant::getNullValue(Op0->getType());
   4691       return new ICmpInst(ICmpInst::ICMP_SLT, Op0, Zero);
   4692     }
   4693 
   4694     // For i32: x <u 2147483648 -> x >s -1  -> true if sign bit clear
   4695     if (Pred == ICmpInst::ICMP_ULT && C->isMinSignedValue()) {
   4696       Constant *AllOnes = Constant::getAllOnesValue(Op0->getType());
   4697       return new ICmpInst(ICmpInst::ICMP_SGT, Op0, AllOnes);
   4698     }
   4699   }
   4700 
   4701   if (Instruction *Res = foldICmpInstWithConstant(I))
   4702     return Res;
   4703 
   4704   if (Instruction *Res = foldICmpInstWithConstantNotInt(I))
   4705     return Res;
   4706 
   4707   // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now.
   4708   if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0))
   4709     if (Instruction *NI = foldGEPICmp(GEP, Op1, I.getPredicate(), I))
   4710       return NI;
   4711   if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1))
   4712     if (Instruction *NI = foldGEPICmp(GEP, Op0,
   4713                            ICmpInst::getSwappedPredicate(I.getPredicate()), I))
   4714       return NI;
   4715 
   4716   // Try to optimize equality comparisons against alloca-based pointers.
   4717   if (Op0->getType()->isPointerTy() && I.isEquality()) {
   4718     assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?");
   4719     if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op0, DL)))
   4720       if (Instruction *New = foldAllocaCmp(I, Alloca, Op1))
   4721         return New;
   4722     if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op1, DL)))
   4723       if (Instruction *New = foldAllocaCmp(I, Alloca, Op0))
   4724         return New;
   4725   }
   4726 
   4727   // Zero-equality and sign-bit checks are preserved through sitofp + bitcast.
   4728   Value *X;
   4729   if (match(Op0, m_BitCast(m_SIToFP(m_Value(X))))) {
   4730     // icmp  eq (bitcast (sitofp X)), 0 --> icmp  eq X, 0
   4731     // icmp  ne (bitcast (sitofp X)), 0 --> icmp  ne X, 0
   4732     // icmp slt (bitcast (sitofp X)), 0 --> icmp slt X, 0
   4733     // icmp sgt (bitcast (sitofp X)), 0 --> icmp sgt X, 0
   4734     if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_SLT ||
   4735          Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT) &&
   4736         match(Op1, m_Zero()))
   4737       return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
   4738 
   4739     // icmp slt (bitcast (sitofp X)), 1 --> icmp slt X, 1
   4740     if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_One()))
   4741       return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), 1));
   4742 
   4743     // icmp sgt (bitcast (sitofp X)), -1 --> icmp sgt X, -1
   4744     if (Pred == ICmpInst::ICMP_SGT && match(Op1, m_AllOnes()))
   4745       return new ICmpInst(Pred, X, ConstantInt::getAllOnesValue(X->getType()));
   4746   }
   4747 
   4748   // Zero-equality checks are preserved through unsigned floating-point casts:
   4749   // icmp eq (bitcast (uitofp X)), 0 --> icmp eq X, 0
   4750   // icmp ne (bitcast (uitofp X)), 0 --> icmp ne X, 0
   4751   if (match(Op0, m_BitCast(m_UIToFP(m_Value(X)))))
   4752     if (I.isEquality() && match(Op1, m_Zero()))
   4753       return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
   4754 
   4755   // Test to see if the operands of the icmp are casted versions of other
   4756   // values.  If the ptr->ptr cast can be stripped off both arguments, we do so
   4757   // now.
   4758   if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
   4759     if (Op0->getType()->isPointerTy() &&
   4760         (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
   4761       // We keep moving the cast from the left operand over to the right
   4762       // operand, where it can often be eliminated completely.
   4763       Op0 = CI->getOperand(0);
   4764 
   4765       // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
   4766       // so eliminate it as well.
   4767       if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1))
   4768         Op1 = CI2->getOperand(0);
   4769 
   4770       // If Op1 is a constant, we can fold the cast into the constant.
   4771       if (Op0->getType() != Op1->getType()) {
   4772         if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
   4773           Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
   4774         } else {
   4775           // Otherwise, cast the RHS right before the icmp
   4776           Op1 = Builder.CreateBitCast(Op1, Op0->getType());
   4777         }
   4778       }
   4779       return new ICmpInst(I.getPredicate(), Op0, Op1);
   4780     }
   4781   }
   4782 
   4783   if (isa<CastInst>(Op0)) {
   4784     // Handle the special case of: icmp (cast bool to X), <cst>
   4785     // This comes up when you have code like
   4786     //   int X = A < B;
   4787     //   if (X) ...
   4788     // For generality, we handle any zero-extension of any operand comparison
   4789     // with a constant or another cast from the same type.
   4790     if (isa<Constant>(Op1) || isa<CastInst>(Op1))
   4791       if (Instruction *R = foldICmpWithCastAndCast(I))
   4792         return R;
   4793   }
   4794 
   4795   if (Instruction *Res = foldICmpBinOp(I))
   4796     return Res;
   4797 
   4798   if (Instruction *Res = foldICmpWithMinMax(I))
   4799     return Res;
   4800 
   4801   {
   4802     Value *A, *B;
   4803     // Transform (A & ~B) == 0 --> (A & B) != 0
   4804     // and       (A & ~B) != 0 --> (A & B) == 0
   4805     // if A is a power of 2.
   4806     if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
   4807         match(Op1, m_Zero()) &&
   4808         isKnownToBeAPowerOfTwo(A, false, 0, &I) && I.isEquality())
   4809       return new ICmpInst(I.getInversePredicate(), Builder.CreateAnd(A, B),
   4810                           Op1);
   4811 
   4812     // ~X < ~Y --> Y < X
   4813     // ~X < C -->  X > ~C
   4814     if (match(Op0, m_Not(m_Value(A)))) {
   4815       if (match(Op1, m_Not(m_Value(B))))
   4816         return new ICmpInst(I.getPredicate(), B, A);
   4817 
   4818       const APInt *C;
   4819       if (match(Op1, m_APInt(C)))
   4820         return new ICmpInst(I.getSwappedPredicate(), A,
   4821                             ConstantInt::get(Op1->getType(), ~(*C)));
   4822     }
   4823 
   4824     Instruction *AddI = nullptr;
   4825     if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B),
   4826                                      m_Instruction(AddI))) &&
   4827         isa<IntegerType>(A->getType())) {
   4828       Value *Result;
   4829       Constant *Overflow;
   4830       if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result,
   4831                                 Overflow)) {
   4832         replaceInstUsesWith(*AddI, Result);
   4833         return replaceInstUsesWith(I, Overflow);
   4834       }
   4835     }
   4836 
   4837     // (zext a) * (zext b)  --> llvm.umul.with.overflow.
   4838     if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
   4839       if (Instruction *R = processUMulZExtIdiom(I, Op0, Op1, *this))
   4840         return R;
   4841     }
   4842     if (match(Op1, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
   4843       if (Instruction *R = processUMulZExtIdiom(I, Op1, Op0, *this))
   4844         return R;
   4845     }
   4846   }
   4847 
   4848   if (Instruction *Res = foldICmpEquality(I))
   4849     return Res;
   4850 
   4851   // The 'cmpxchg' instruction returns an aggregate containing the old value and
   4852   // an i1 which indicates whether or not we successfully did the swap.
   4853   //
   4854   // Replace comparisons between the old value and the expected value with the
   4855   // indicator that 'cmpxchg' returns.
   4856   //
   4857   // N.B.  This transform is only valid when the 'cmpxchg' is not permitted to
   4858   // spuriously fail.  In those cases, the old value may equal the expected
   4859   // value but it is possible for the swap to not occur.
   4860   if (I.getPredicate() == ICmpInst::ICMP_EQ)
   4861     if (auto *EVI = dyn_cast<ExtractValueInst>(Op0))
   4862       if (auto *ACXI = dyn_cast<AtomicCmpXchgInst>(EVI->getAggregateOperand()))
   4863         if (EVI->getIndices()[0] == 0 && ACXI->getCompareOperand() == Op1 &&
   4864             !ACXI->isWeak())
   4865           return ExtractValueInst::Create(ACXI, 1);
   4866 
   4867   {
   4868     Value *X; ConstantInt *Cst;
   4869     // icmp X+Cst, X
   4870     if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
   4871       return foldICmpAddOpConst(X, Cst, I.getPredicate());
   4872 
   4873     // icmp X, X+Cst
   4874     if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
   4875       return foldICmpAddOpConst(X, Cst, I.getSwappedPredicate());
   4876   }
   4877 
   4878   return Changed ? &I : nullptr;
   4879 }
   4880 
   4881 /// Fold fcmp ([us]itofp x, cst) if possible.
   4882 Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
   4883                                                 Constant *RHSC) {
   4884   if (!isa<ConstantFP>(RHSC)) return nullptr;
   4885   const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
   4886 
   4887   // Get the width of the mantissa.  We don't want to hack on conversions that
   4888   // might lose information from the integer, e.g. "i64 -> float"
   4889   int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
   4890   if (MantissaWidth == -1) return nullptr;  // Unknown.
   4891 
   4892   IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
   4893 
   4894   bool LHSUnsigned = isa<UIToFPInst>(LHSI);
   4895 
   4896   if (I.isEquality()) {
   4897     FCmpInst::Predicate P = I.getPredicate();
   4898     bool IsExact = false;
   4899     APSInt RHSCvt(IntTy->getBitWidth(), LHSUnsigned);
   4900     RHS.convertToInteger(RHSCvt, APFloat::rmNearestTiesToEven, &IsExact);
   4901 
   4902     // If the floating point constant isn't an integer value, we know if we will
   4903     // ever compare equal / not equal to it.
   4904     if (!IsExact) {
   4905       // TODO: Can never be -0.0 and other non-representable values
   4906       APFloat RHSRoundInt(RHS);
   4907       RHSRoundInt.roundToIntegral(APFloat::rmNearestTiesToEven);
   4908       if (RHS.compare(RHSRoundInt) != APFloat::cmpEqual) {
   4909         if (P == FCmpInst::FCMP_OEQ || P == FCmpInst::FCMP_UEQ)
   4910           return replaceInstUsesWith(I, Builder.getFalse());
   4911 
   4912         assert(P == FCmpInst::FCMP_ONE || P == FCmpInst::FCMP_UNE);
   4913         return replaceInstUsesWith(I, Builder.getTrue());
   4914       }
   4915     }
   4916 
   4917     // TODO: If the constant is exactly representable, is it always OK to do
   4918     // equality compares as integer?
   4919   }
   4920 
   4921   // Check to see that the input is converted from an integer type that is small
   4922   // enough that preserves all bits.  TODO: check here for "known" sign bits.
   4923   // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
   4924   unsigned InputSize = IntTy->getScalarSizeInBits();
   4925 
   4926   // Following test does NOT adjust InputSize downwards for signed inputs,
   4927   // because the most negative value still requires all the mantissa bits
   4928   // to distinguish it from one less than that value.
   4929   if ((int)InputSize > MantissaWidth) {
   4930     // Conversion would lose accuracy. Check if loss can impact comparison.
   4931     int Exp = ilogb(RHS);
   4932     if (Exp == APFloat::IEK_Inf) {
   4933       int MaxExponent = ilogb(APFloat::getLargest(RHS.getSemantics()));
   4934       if (MaxExponent < (int)InputSize - !LHSUnsigned)
   4935         // Conversion could create infinity.
   4936         return nullptr;
   4937     } else {
   4938       // Note that if RHS is zero or NaN, then Exp is negative
   4939       // and first condition is trivially false.
   4940       if (MantissaWidth <= Exp && Exp <= (int)InputSize - !LHSUnsigned)
   4941         // Conversion could affect comparison.
   4942         return nullptr;
   4943     }
   4944   }
   4945 
   4946   // Otherwise, we can potentially simplify the comparison.  We know that it
   4947   // will always come through as an integer value and we know the constant is
   4948   // not a NAN (it would have been previously simplified).
   4949   assert(!RHS.isNaN() && "NaN comparison not already folded!");
   4950 
   4951   ICmpInst::Predicate Pred;
   4952   switch (I.getPredicate()) {
   4953   default: llvm_unreachable("Unexpected predicate!");
   4954   case FCmpInst::FCMP_UEQ:
   4955   case FCmpInst::FCMP_OEQ:
   4956     Pred = ICmpInst::ICMP_EQ;
   4957     break;
   4958   case FCmpInst::FCMP_UGT:
   4959   case FCmpInst::FCMP_OGT:
   4960     Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT;
   4961     break;
   4962   case FCmpInst::FCMP_UGE:
   4963   case FCmpInst::FCMP_OGE:
   4964     Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE;
   4965     break;
   4966   case FCmpInst::FCMP_ULT:
   4967   case FCmpInst::FCMP_OLT:
   4968     Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT;
   4969     break;
   4970   case FCmpInst::FCMP_ULE:
   4971   case FCmpInst::FCMP_OLE:
   4972     Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE;
   4973     break;
   4974   case FCmpInst::FCMP_UNE:
   4975   case FCmpInst::FCMP_ONE:
   4976     Pred = ICmpInst::ICMP_NE;
   4977     break;
   4978   case FCmpInst::FCMP_ORD:
   4979     return replaceInstUsesWith(I, Builder.getTrue());
   4980   case FCmpInst::FCMP_UNO:
   4981     return replaceInstUsesWith(I, Builder.getFalse());
   4982   }
   4983 
   4984   // Now we know that the APFloat is a normal number, zero or inf.
   4985 
   4986   // See if the FP constant is too large for the integer.  For example,
   4987   // comparing an i8 to 300.0.
   4988   unsigned IntWidth = IntTy->getScalarSizeInBits();
   4989 
   4990   if (!LHSUnsigned) {
   4991     // If the RHS value is > SignedMax, fold the comparison.  This handles +INF
   4992     // and large values.
   4993     APFloat SMax(RHS.getSemantics());
   4994     SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true,
   4995                           APFloat::rmNearestTiesToEven);
   4996     if (SMax.compare(RHS) == APFloat::cmpLessThan) {  // smax < 13123.0
   4997       if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_SLT ||
   4998           Pred == ICmpInst::ICMP_SLE)
   4999         return replaceInstUsesWith(I, Builder.getTrue());
   5000       return replaceInstUsesWith(I, Builder.getFalse());
   5001     }
   5002   } else {
   5003     // If the RHS value is > UnsignedMax, fold the comparison. This handles
   5004     // +INF and large values.
   5005     APFloat UMax(RHS.getSemantics());
   5006     UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false,
   5007                           APFloat::rmNearestTiesToEven);
   5008     if (UMax.compare(RHS) == APFloat::cmpLessThan) {  // umax < 13123.0
   5009       if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_ULT ||
   5010           Pred == ICmpInst::ICMP_ULE)
   5011         return replaceInstUsesWith(I, Builder.getTrue());
   5012       return replaceInstUsesWith(I, Builder.getFalse());
   5013     }
   5014   }
   5015 
   5016   if (!LHSUnsigned) {
   5017     // See if the RHS value is < SignedMin.
   5018     APFloat SMin(RHS.getSemantics());
   5019     SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true,
   5020                           APFloat::rmNearestTiesToEven);
   5021     if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
   5022       if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
   5023           Pred == ICmpInst::ICMP_SGE)
   5024         return replaceInstUsesWith(I, Builder.getTrue());
   5025       return replaceInstUsesWith(I, Builder.getFalse());
   5026     }
   5027   } else {
   5028     // See if the RHS value is < UnsignedMin.
   5029     APFloat SMin(RHS.getSemantics());
   5030     SMin.convertFromAPInt(APInt::getMinValue(IntWidth), true,
   5031                           APFloat::rmNearestTiesToEven);
   5032     if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0
   5033       if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT ||
   5034           Pred == ICmpInst::ICMP_UGE)
   5035         return replaceInstUsesWith(I, Builder.getTrue());
   5036       return replaceInstUsesWith(I, Builder.getFalse());
   5037     }
   5038   }
   5039 
   5040   // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or
   5041   // [0, UMAX], but it may still be fractional.  See if it is fractional by
   5042   // casting the FP value to the integer value and back, checking for equality.
   5043   // Don't do this for zero, because -0.0 is not fractional.
   5044   Constant *RHSInt = LHSUnsigned
   5045     ? ConstantExpr::getFPToUI(RHSC, IntTy)
   5046     : ConstantExpr::getFPToSI(RHSC, IntTy);
   5047   if (!RHS.isZero()) {
   5048     bool Equal = LHSUnsigned
   5049       ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC
   5050       : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC;
   5051     if (!Equal) {
   5052       // If we had a comparison against a fractional value, we have to adjust
   5053       // the compare predicate and sometimes the value.  RHSC is rounded towards
   5054       // zero at this point.
   5055       switch (Pred) {
   5056       default: llvm_unreachable("Unexpected integer comparison!");
   5057       case ICmpInst::ICMP_NE:  // (float)int != 4.4   --> true
   5058         return replaceInstUsesWith(I, Builder.getTrue());
   5059       case ICmpInst::ICMP_EQ:  // (float)int == 4.4   --> false
   5060         return replaceInstUsesWith(I, Builder.getFalse());
   5061       case ICmpInst::ICMP_ULE:
   5062         // (float)int <= 4.4   --> int <= 4
   5063         // (float)int <= -4.4  --> false
   5064         if (RHS.isNegative())
   5065           return replaceInstUsesWith(I, Builder.getFalse());
   5066         break;
   5067       case ICmpInst::ICMP_SLE:
   5068         // (float)int <= 4.4   --> int <= 4
   5069         // (float)int <= -4.4  --> int < -4
   5070         if (RHS.isNegative())
   5071           Pred = ICmpInst::ICMP_SLT;
   5072         break;
   5073       case ICmpInst::ICMP_ULT:
   5074         // (float)int < -4.4   --> false
   5075         // (float)int < 4.4    --> int <= 4
   5076         if (RHS.isNegative())
   5077           return replaceInstUsesWith(I, Builder.getFalse());
   5078         Pred = ICmpInst::ICMP_ULE;
   5079         break;
   5080       case ICmpInst::ICMP_SLT:
   5081         // (float)int < -4.4   --> int < -4
   5082         // (float)int < 4.4    --> int <= 4
   5083         if (!RHS.isNegative())
   5084           Pred = ICmpInst::ICMP_SLE;
   5085         break;
   5086       case ICmpInst::ICMP_UGT:
   5087         // (float)int > 4.4    --> int > 4
   5088         // (float)int > -4.4   --> true
   5089         if (RHS.isNegative())
   5090           return replaceInstUsesWith(I, Builder.getTrue());
   5091         break;
   5092       case ICmpInst::ICMP_SGT:
   5093         // (float)int > 4.4    --> int > 4
   5094         // (float)int > -4.4   --> int >= -4
   5095         if (RHS.isNegative())
   5096           Pred = ICmpInst::ICMP_SGE;
   5097         break;
   5098       case ICmpInst::ICMP_UGE:
   5099         // (float)int >= -4.4   --> true
   5100         // (float)int >= 4.4    --> int > 4
   5101         if (RHS.isNegative())
   5102           return replaceInstUsesWith(I, Builder.getTrue());
   5103         Pred = ICmpInst::ICMP_UGT;
   5104         break;
   5105       case ICmpInst::ICMP_SGE:
   5106         // (float)int >= -4.4   --> int >= -4
   5107         // (float)int >= 4.4    --> int > 4
   5108         if (!RHS.isNegative())
   5109           Pred = ICmpInst::ICMP_SGT;
   5110         break;
   5111       }
   5112     }
   5113   }
   5114 
   5115   // Lower this FP comparison into an appropriate integer version of the
   5116   // comparison.
   5117   return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
   5118 }
   5119 
   5120 Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
   5121   bool Changed = false;
   5122 
   5123   /// Orders the operands of the compare so that they are listed from most
   5124   /// complex to least complex.  This puts constants before unary operators,
   5125   /// before binary operators.
   5126   if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
   5127     I.swapOperands();
   5128     Changed = true;
   5129   }
   5130 
   5131   const CmpInst::Predicate Pred = I.getPredicate();
   5132   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
   5133   if (Value *V = SimplifyFCmpInst(Pred, Op0, Op1, I.getFastMathFlags(),
   5134                                   SQ.getWithInstruction(&I)))
   5135     return replaceInstUsesWith(I, V);
   5136 
   5137   // Simplify 'fcmp pred X, X'
   5138   if (Op0 == Op1) {
   5139     switch (Pred) {
   5140       default: break;
   5141     case FCmpInst::FCMP_UNO:    // True if unordered: isnan(X) | isnan(Y)
   5142     case FCmpInst::FCMP_ULT:    // True if unordered or less than
   5143     case FCmpInst::FCMP_UGT:    // True if unordered or greater than
   5144     case FCmpInst::FCMP_UNE:    // True if unordered or not equal
   5145       // Canonicalize these to be 'fcmp uno %X, 0.0'.
   5146       I.setPredicate(FCmpInst::FCMP_UNO);
   5147       I.setOperand(1, Constant::getNullValue(Op0->getType()));
   5148       return &I;
   5149 
   5150     case FCmpInst::FCMP_ORD:    // True if ordered (no nans)
   5151     case FCmpInst::FCMP_OEQ:    // True if ordered and equal
   5152     case FCmpInst::FCMP_OGE:    // True if ordered and greater than or equal
   5153     case FCmpInst::FCMP_OLE:    // True if ordered and less than or equal
   5154       // Canonicalize these to be 'fcmp ord %X, 0.0'.
   5155       I.setPredicate(FCmpInst::FCMP_ORD);
   5156       I.setOperand(1, Constant::getNullValue(Op0->getType()));
   5157       return &I;
   5158     }
   5159   }
   5160 
   5161   // If we're just checking for a NaN (ORD/UNO) and have a non-NaN operand,
   5162   // then canonicalize the operand to 0.0.
   5163   if (Pred == CmpInst::FCMP_ORD || Pred == CmpInst::FCMP_UNO) {
   5164     if (!match(Op0, m_PosZeroFP()) && isKnownNeverNaN(Op0)) {
   5165       I.setOperand(0, ConstantFP::getNullValue(Op0->getType()));
   5166       return &I;
   5167     }
   5168     if (!match(Op1, m_PosZeroFP()) && isKnownNeverNaN(Op1)) {
   5169       I.setOperand(1, ConstantFP::getNullValue(Op0->getType()));
   5170       return &I;
   5171     }
   5172   }
   5173 
   5174   // Test if the FCmpInst instruction is used exclusively by a select as
   5175   // part of a minimum or maximum operation. If so, refrain from doing
   5176   // any other folding. This helps out other analyses which understand
   5177   // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
   5178   // and CodeGen. And in this case, at least one of the comparison
   5179   // operands has at least one user besides the compare (the select),
   5180   // which would often largely negate the benefit of folding anyway.
   5181   if (I.hasOneUse())
   5182     if (SelectInst *SI = dyn_cast<SelectInst>(I.user_back())) {
   5183       Value *A, *B;
   5184       SelectPatternResult SPR = matchSelectPattern(SI, A, B);
   5185       if (SPR.Flavor != SPF_UNKNOWN)
   5186         return nullptr;
   5187     }
   5188 
   5189   // Handle fcmp with constant RHS
   5190   if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
   5191     if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
   5192       switch (LHSI->getOpcode()) {
   5193       case Instruction::FPExt: {
   5194         // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless
   5195         FPExtInst *LHSExt = cast<FPExtInst>(LHSI);
   5196         ConstantFP *RHSF = dyn_cast<ConstantFP>(RHSC);
   5197         if (!RHSF)
   5198           break;
   5199 
   5200         const fltSemantics *Sem;
   5201         // FIXME: This shouldn't be here.
   5202         if (LHSExt->getSrcTy()->isHalfTy())
   5203           Sem = &APFloat::IEEEhalf();
   5204         else if (LHSExt->getSrcTy()->isFloatTy())
   5205           Sem = &APFloat::IEEEsingle();
   5206         else if (LHSExt->getSrcTy()->isDoubleTy())
   5207           Sem = &APFloat::IEEEdouble();
   5208         else if (LHSExt->getSrcTy()->isFP128Ty())
   5209           Sem = &APFloat::IEEEquad();
   5210         else if (LHSExt->getSrcTy()->isX86_FP80Ty())
   5211           Sem = &APFloat::x87DoubleExtended();
   5212         else if (LHSExt->getSrcTy()->isPPC_FP128Ty())
   5213           Sem = &APFloat::PPCDoubleDouble();
   5214         else
   5215           break;
   5216 
   5217         bool Lossy;
   5218         APFloat F = RHSF->getValueAPF();
   5219         F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy);
   5220 
   5221         // Avoid lossy conversions and denormals. Zero is a special case
   5222         // that's OK to convert.
   5223         APFloat Fabs = F;
   5224         Fabs.clearSign();
   5225         if (!Lossy &&
   5226             ((Fabs.compare(APFloat::getSmallestNormalized(*Sem)) !=
   5227                  APFloat::cmpLessThan) || Fabs.isZero()))
   5228 
   5229           return new FCmpInst(Pred, LHSExt->getOperand(0),
   5230                               ConstantFP::get(RHSC->getContext(), F));
   5231         break;
   5232       }
   5233       case Instruction::PHI:
   5234         // Only fold fcmp into the PHI if the phi and fcmp are in the same
   5235         // block.  If in the same block, we're encouraging jump threading.  If
   5236         // not, we are just pessimizing the code by making an i1 phi.
   5237         if (LHSI->getParent() == I.getParent())
   5238           if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
   5239             return NV;
   5240         break;
   5241       case Instruction::SIToFP:
   5242       case Instruction::UIToFP:
   5243         if (Instruction *NV = foldFCmpIntToFPConst(I, LHSI, RHSC))
   5244           return NV;
   5245         break;
   5246       case Instruction::FSub: {
   5247         // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C
   5248         Value *Op;
   5249         if (match(LHSI, m_FNeg(m_Value(Op))))
   5250           return new FCmpInst(I.getSwappedPredicate(), Op,
   5251                               ConstantExpr::getFNeg(RHSC));
   5252         break;
   5253       }
   5254       case Instruction::Load:
   5255         if (GetElementPtrInst *GEP =
   5256             dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
   5257           if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
   5258             if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
   5259                 !cast<LoadInst>(LHSI)->isVolatile())
   5260               if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I))
   5261                 return Res;
   5262         }
   5263         break;
   5264       case Instruction::Call: {
   5265         if (!RHSC->isNullValue())
   5266           break;
   5267 
   5268         CallInst *CI = cast<CallInst>(LHSI);
   5269         Intrinsic::ID IID = getIntrinsicForCallSite(CI, &TLI);
   5270         if (IID != Intrinsic::fabs)
   5271           break;
   5272 
   5273         // Various optimization for fabs compared with zero.
   5274         switch (Pred) {
   5275         default:
   5276           break;
   5277         // fabs(x) < 0 --> false
   5278         case FCmpInst::FCMP_OLT:
   5279           llvm_unreachable("handled by SimplifyFCmpInst");
   5280         // fabs(x) > 0 --> x != 0
   5281         case FCmpInst::FCMP_OGT:
   5282           return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0), RHSC);
   5283         // fabs(x) <= 0 --> x == 0
   5284         case FCmpInst::FCMP_OLE:
   5285           return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0), RHSC);
   5286         // fabs(x) >= 0 --> !isnan(x)
   5287         case FCmpInst::FCMP_OGE:
   5288           return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0), RHSC);
   5289         // fabs(x) == 0 --> x == 0
   5290         // fabs(x) != 0 --> x != 0
   5291         case FCmpInst::FCMP_OEQ:
   5292         case FCmpInst::FCMP_UEQ:
   5293         case FCmpInst::FCMP_ONE:
   5294         case FCmpInst::FCMP_UNE:
   5295           return new FCmpInst(Pred, CI->getArgOperand(0), RHSC);
   5296         }
   5297       }
   5298       }
   5299   }
   5300 
   5301   // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y
   5302   Value *X, *Y;
   5303   if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y))))
   5304     return new FCmpInst(I.getSwappedPredicate(), X, Y);
   5305 
   5306   // fcmp (fpext x), (fpext y) -> fcmp x, y
   5307   if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0))
   5308     if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1))
   5309       if (LHSExt->getSrcTy() == RHSExt->getSrcTy())
   5310         return new FCmpInst(Pred, LHSExt->getOperand(0), RHSExt->getOperand(0));
   5311 
   5312   return Changed ? &I : nullptr;
   5313 }
   5314