Home | History | Annotate | Download | only in SelectionDAG
      1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
     11 // both before and after the DAG is legalized.
     12 //
     13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
     14 // primarily intended to handle simplification opportunities that are implicit
     15 // in the LLVM IR and exposed by the various codegen lowering phases.
     16 //
     17 //===----------------------------------------------------------------------===//
     18 
     19 #include "llvm/CodeGen/SelectionDAG.h"
     20 #include "llvm/ADT/SmallPtrSet.h"
     21 #include "llvm/ADT/Statistic.h"
     22 #include "llvm/Analysis/AliasAnalysis.h"
     23 #include "llvm/CodeGen/MachineFrameInfo.h"
     24 #include "llvm/CodeGen/MachineFunction.h"
     25 #include "llvm/IR/DataLayout.h"
     26 #include "llvm/IR/DerivedTypes.h"
     27 #include "llvm/IR/Function.h"
     28 #include "llvm/IR/LLVMContext.h"
     29 #include "llvm/Support/CommandLine.h"
     30 #include "llvm/Support/Debug.h"
     31 #include "llvm/Support/ErrorHandling.h"
     32 #include "llvm/Support/MathExtras.h"
     33 #include "llvm/Support/raw_ostream.h"
     34 #include "llvm/Target/TargetLowering.h"
     35 #include "llvm/Target/TargetMachine.h"
     36 #include "llvm/Target/TargetOptions.h"
     37 #include "llvm/Target/TargetRegisterInfo.h"
     38 #include "llvm/Target/TargetSubtargetInfo.h"
     39 #include <algorithm>
     40 using namespace llvm;
     41 
     42 #define DEBUG_TYPE "dagcombine"
     43 
     44 STATISTIC(NodesCombined   , "Number of dag nodes combined");
     45 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
     46 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
     47 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
     48 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
     49 STATISTIC(SlicedLoads, "Number of load sliced");
     50 
     51 namespace {
     52   static cl::opt<bool>
     53     CombinerAA("combiner-alias-analysis", cl::Hidden,
     54                cl::desc("Enable DAG combiner alias-analysis heuristics"));
     55 
     56   static cl::opt<bool>
     57     CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
     58                cl::desc("Enable DAG combiner's use of IR alias analysis"));
     59 
     60   static cl::opt<bool>
     61     UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
     62                cl::desc("Enable DAG combiner's use of TBAA"));
     63 
     64 #ifndef NDEBUG
     65   static cl::opt<std::string>
     66     CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
     67                cl::desc("Only use DAG-combiner alias analysis in this"
     68                         " function"));
     69 #endif
     70 
     71   /// Hidden option to stress test load slicing, i.e., when this option
     72   /// is enabled, load slicing bypasses most of its profitability guards.
     73   static cl::opt<bool>
     74   StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
     75                     cl::desc("Bypass the profitability model of load "
     76                              "slicing"),
     77                     cl::init(false));
     78 
     79 //------------------------------ DAGCombiner ---------------------------------//
     80 
     81   class DAGCombiner {
     82     SelectionDAG &DAG;
     83     const TargetLowering &TLI;
     84     CombineLevel Level;
     85     CodeGenOpt::Level OptLevel;
     86     bool LegalOperations;
     87     bool LegalTypes;
     88     bool ForCodeSize;
     89 
     90     // Worklist of all of the nodes that need to be simplified.
     91     //
     92     // This has the semantics that when adding to the worklist,
     93     // the item added must be next to be processed. It should
     94     // also only appear once. The naive approach to this takes
     95     // linear time.
     96     //
     97     // To reduce the insert/remove time to logarithmic, we use
     98     // a set and a vector to maintain our worklist.
     99     //
    100     // The set contains the items on the worklist, but does not
    101     // maintain the order they should be visited.
    102     //
    103     // The vector maintains the order nodes should be visited, but may
    104     // contain duplicate or removed nodes. When choosing a node to
    105     // visit, we pop off the order stack until we find an item that is
    106     // also in the contents set. All operations are O(log N).
    107     SmallPtrSet<SDNode*, 64> WorkListContents;
    108     SmallVector<SDNode*, 64> WorkListOrder;
    109 
    110     // AA - Used for DAG load/store alias analysis.
    111     AliasAnalysis &AA;
    112 
    113     /// AddUsersToWorkList - When an instruction is simplified, add all users of
    114     /// the instruction to the work lists because they might get more simplified
    115     /// now.
    116     ///
    117     void AddUsersToWorkList(SDNode *N) {
    118       for (SDNode *Node : N->uses())
    119         AddToWorkList(Node);
    120     }
    121 
    122     /// visit - call the node-specific routine that knows how to fold each
    123     /// particular type of node.
    124     SDValue visit(SDNode *N);
    125 
    126   public:
    127     /// AddToWorkList - Add to the work list making sure its instance is at the
    128     /// back (next to be processed.)
    129     void AddToWorkList(SDNode *N) {
    130       WorkListContents.insert(N);
    131       WorkListOrder.push_back(N);
    132     }
    133 
    134     /// removeFromWorkList - remove all instances of N from the worklist.
    135     ///
    136     void removeFromWorkList(SDNode *N) {
    137       WorkListContents.erase(N);
    138     }
    139 
    140     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
    141                       bool AddTo = true);
    142 
    143     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
    144       return CombineTo(N, &Res, 1, AddTo);
    145     }
    146 
    147     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
    148                       bool AddTo = true) {
    149       SDValue To[] = { Res0, Res1 };
    150       return CombineTo(N, To, 2, AddTo);
    151     }
    152 
    153     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
    154 
    155   private:
    156 
    157     /// SimplifyDemandedBits - Check the specified integer node value to see if
    158     /// it can be simplified or if things it uses can be simplified by bit
    159     /// propagation.  If so, return true.
    160     bool SimplifyDemandedBits(SDValue Op) {
    161       unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
    162       APInt Demanded = APInt::getAllOnesValue(BitWidth);
    163       return SimplifyDemandedBits(Op, Demanded);
    164     }
    165 
    166     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
    167 
    168     bool CombineToPreIndexedLoadStore(SDNode *N);
    169     bool CombineToPostIndexedLoadStore(SDNode *N);
    170     bool SliceUpLoad(SDNode *N);
    171 
    172     /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
    173     ///   load.
    174     ///
    175     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
    176     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
    177     /// \param EltNo index of the vector element to load.
    178     /// \param OriginalLoad load that EVE came from to be replaced.
    179     /// \returns EVE on success SDValue() on failure.
    180     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
    181         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
    182     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
    183     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
    184     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
    185     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
    186     SDValue PromoteIntBinOp(SDValue Op);
    187     SDValue PromoteIntShiftOp(SDValue Op);
    188     SDValue PromoteExtend(SDValue Op);
    189     bool PromoteLoad(SDValue Op);
    190 
    191     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
    192                          SDValue Trunc, SDValue ExtLoad, SDLoc DL,
    193                          ISD::NodeType ExtType);
    194 
    195     /// combine - call the node-specific routine that knows how to fold each
    196     /// particular type of node. If that doesn't do anything, try the
    197     /// target-specific DAG combines.
    198     SDValue combine(SDNode *N);
    199 
    200     // Visitation implementation - Implement dag node combining for different
    201     // node types.  The semantics are as follows:
    202     // Return Value:
    203     //   SDValue.getNode() == 0 - No change was made
    204     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
    205     //   otherwise              - N should be replaced by the returned Operand.
    206     //
    207     SDValue visitTokenFactor(SDNode *N);
    208     SDValue visitMERGE_VALUES(SDNode *N);
    209     SDValue visitADD(SDNode *N);
    210     SDValue visitSUB(SDNode *N);
    211     SDValue visitADDC(SDNode *N);
    212     SDValue visitSUBC(SDNode *N);
    213     SDValue visitADDE(SDNode *N);
    214     SDValue visitSUBE(SDNode *N);
    215     SDValue visitMUL(SDNode *N);
    216     SDValue visitSDIV(SDNode *N);
    217     SDValue visitUDIV(SDNode *N);
    218     SDValue visitSREM(SDNode *N);
    219     SDValue visitUREM(SDNode *N);
    220     SDValue visitMULHU(SDNode *N);
    221     SDValue visitMULHS(SDNode *N);
    222     SDValue visitSMUL_LOHI(SDNode *N);
    223     SDValue visitUMUL_LOHI(SDNode *N);
    224     SDValue visitSMULO(SDNode *N);
    225     SDValue visitUMULO(SDNode *N);
    226     SDValue visitSDIVREM(SDNode *N);
    227     SDValue visitUDIVREM(SDNode *N);
    228     SDValue visitAND(SDNode *N);
    229     SDValue visitOR(SDNode *N);
    230     SDValue visitXOR(SDNode *N);
    231     SDValue SimplifyVBinOp(SDNode *N);
    232     SDValue SimplifyVUnaryOp(SDNode *N);
    233     SDValue visitSHL(SDNode *N);
    234     SDValue visitSRA(SDNode *N);
    235     SDValue visitSRL(SDNode *N);
    236     SDValue visitRotate(SDNode *N);
    237     SDValue visitCTLZ(SDNode *N);
    238     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
    239     SDValue visitCTTZ(SDNode *N);
    240     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
    241     SDValue visitCTPOP(SDNode *N);
    242     SDValue visitSELECT(SDNode *N);
    243     SDValue visitVSELECT(SDNode *N);
    244     SDValue visitSELECT_CC(SDNode *N);
    245     SDValue visitSETCC(SDNode *N);
    246     SDValue visitSIGN_EXTEND(SDNode *N);
    247     SDValue visitZERO_EXTEND(SDNode *N);
    248     SDValue visitANY_EXTEND(SDNode *N);
    249     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
    250     SDValue visitTRUNCATE(SDNode *N);
    251     SDValue visitBITCAST(SDNode *N);
    252     SDValue visitBUILD_PAIR(SDNode *N);
    253     SDValue visitFADD(SDNode *N);
    254     SDValue visitFSUB(SDNode *N);
    255     SDValue visitFMUL(SDNode *N);
    256     SDValue visitFMA(SDNode *N);
    257     SDValue visitFDIV(SDNode *N);
    258     SDValue visitFREM(SDNode *N);
    259     SDValue visitFCOPYSIGN(SDNode *N);
    260     SDValue visitSINT_TO_FP(SDNode *N);
    261     SDValue visitUINT_TO_FP(SDNode *N);
    262     SDValue visitFP_TO_SINT(SDNode *N);
    263     SDValue visitFP_TO_UINT(SDNode *N);
    264     SDValue visitFP_ROUND(SDNode *N);
    265     SDValue visitFP_ROUND_INREG(SDNode *N);
    266     SDValue visitFP_EXTEND(SDNode *N);
    267     SDValue visitFNEG(SDNode *N);
    268     SDValue visitFABS(SDNode *N);
    269     SDValue visitFCEIL(SDNode *N);
    270     SDValue visitFTRUNC(SDNode *N);
    271     SDValue visitFFLOOR(SDNode *N);
    272     SDValue visitBRCOND(SDNode *N);
    273     SDValue visitBR_CC(SDNode *N);
    274     SDValue visitLOAD(SDNode *N);
    275     SDValue visitSTORE(SDNode *N);
    276     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
    277     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
    278     SDValue visitBUILD_VECTOR(SDNode *N);
    279     SDValue visitCONCAT_VECTORS(SDNode *N);
    280     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
    281     SDValue visitVECTOR_SHUFFLE(SDNode *N);
    282     SDValue visitINSERT_SUBVECTOR(SDNode *N);
    283 
    284     SDValue XformToShuffleWithZero(SDNode *N);
    285     SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
    286 
    287     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
    288 
    289     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
    290     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
    291     SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2);
    292     SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2,
    293                              SDValue N3, ISD::CondCode CC,
    294                              bool NotExtCompare = false);
    295     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
    296                           SDLoc DL, bool foldBooleans = true);
    297 
    298     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
    299                            SDValue &CC) const;
    300     bool isOneUseSetCC(SDValue N) const;
    301 
    302     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
    303                                          unsigned HiOp);
    304     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
    305     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
    306     SDValue BuildSDIV(SDNode *N);
    307     SDValue BuildUDIV(SDNode *N);
    308     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
    309                                bool DemandHighBits = true);
    310     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
    311     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
    312                               SDValue InnerPos, SDValue InnerNeg,
    313                               unsigned PosOpcode, unsigned NegOpcode,
    314                               SDLoc DL);
    315     SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL);
    316     SDValue ReduceLoadWidth(SDNode *N);
    317     SDValue ReduceLoadOpStoreWidth(SDNode *N);
    318     SDValue TransformFPLoadStorePair(SDNode *N);
    319     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
    320     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
    321 
    322     SDValue GetDemandedBits(SDValue V, const APInt &Mask);
    323 
    324     /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
    325     /// looking for aliasing nodes and adding them to the Aliases vector.
    326     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
    327                           SmallVectorImpl<SDValue> &Aliases);
    328 
    329     /// isAlias - Return true if there is any possibility that the two addresses
    330     /// overlap.
    331     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
    332 
    333     /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
    334     /// looking for a better chain (aliasing node.)
    335     SDValue FindBetterChain(SDNode *N, SDValue Chain);
    336 
    337     /// Merge consecutive store operations into a wide store.
    338     /// This optimization uses wide integers or vectors when possible.
    339     /// \return True if some memory operations were changed.
    340     bool MergeConsecutiveStores(StoreSDNode *N);
    341 
    342     /// \brief Try to transform a truncation where C is a constant:
    343     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
    344     ///
    345     /// \p N needs to be a truncation and its first operand an AND. Other
    346     /// requirements are checked by the function (e.g. that trunc is
    347     /// single-use) and if missed an empty SDValue is returned.
    348     SDValue distributeTruncateThroughAnd(SDNode *N);
    349 
    350   public:
    351     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
    352         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
    353           OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
    354       AttributeSet FnAttrs =
    355           DAG.getMachineFunction().getFunction()->getAttributes();
    356       ForCodeSize =
    357           FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
    358                                Attribute::OptimizeForSize) ||
    359           FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
    360     }
    361 
    362     /// Run - runs the dag combiner on all nodes in the work list
    363     void Run(CombineLevel AtLevel);
    364 
    365     SelectionDAG &getDAG() const { return DAG; }
    366 
    367     /// getShiftAmountTy - Returns a type large enough to hold any valid
    368     /// shift amount - before type legalization these can be huge.
    369     EVT getShiftAmountTy(EVT LHSTy) {
    370       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
    371       if (LHSTy.isVector())
    372         return LHSTy;
    373       return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy)
    374                         : TLI.getPointerTy();
    375     }
    376 
    377     /// isTypeLegal - This method returns true if we are running before type
    378     /// legalization or if the specified VT is legal.
    379     bool isTypeLegal(const EVT &VT) {
    380       if (!LegalTypes) return true;
    381       return TLI.isTypeLegal(VT);
    382     }
    383 
    384     /// getSetCCResultType - Convenience wrapper around
    385     /// TargetLowering::getSetCCResultType
    386     EVT getSetCCResultType(EVT VT) const {
    387       return TLI.getSetCCResultType(*DAG.getContext(), VT);
    388     }
    389   };
    390 }
    391 
    392 
    393 namespace {
    394 /// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
    395 /// nodes from the worklist.
    396 class WorkListRemover : public SelectionDAG::DAGUpdateListener {
    397   DAGCombiner &DC;
    398 public:
    399   explicit WorkListRemover(DAGCombiner &dc)
    400     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
    401 
    402   void NodeDeleted(SDNode *N, SDNode *E) override {
    403     DC.removeFromWorkList(N);
    404   }
    405 };
    406 }
    407 
    408 //===----------------------------------------------------------------------===//
    409 //  TargetLowering::DAGCombinerInfo implementation
    410 //===----------------------------------------------------------------------===//
    411 
    412 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
    413   ((DAGCombiner*)DC)->AddToWorkList(N);
    414 }
    415 
    416 void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
    417   ((DAGCombiner*)DC)->removeFromWorkList(N);
    418 }
    419 
    420 SDValue TargetLowering::DAGCombinerInfo::
    421 CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
    422   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
    423 }
    424 
    425 SDValue TargetLowering::DAGCombinerInfo::
    426 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
    427   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
    428 }
    429 
    430 
    431 SDValue TargetLowering::DAGCombinerInfo::
    432 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
    433   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
    434 }
    435 
    436 void TargetLowering::DAGCombinerInfo::
    437 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
    438   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
    439 }
    440 
    441 //===----------------------------------------------------------------------===//
    442 // Helper Functions
    443 //===----------------------------------------------------------------------===//
    444 
    445 /// isNegatibleForFree - Return 1 if we can compute the negated form of the
    446 /// specified expression for the same cost as the expression itself, or 2 if we
    447 /// can compute the negated form more cheaply than the expression itself.
    448 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
    449                                const TargetLowering &TLI,
    450                                const TargetOptions *Options,
    451                                unsigned Depth = 0) {
    452   // fneg is removable even if it has multiple uses.
    453   if (Op.getOpcode() == ISD::FNEG) return 2;
    454 
    455   // Don't allow anything with multiple uses.
    456   if (!Op.hasOneUse()) return 0;
    457 
    458   // Don't recurse exponentially.
    459   if (Depth > 6) return 0;
    460 
    461   switch (Op.getOpcode()) {
    462   default: return false;
    463   case ISD::ConstantFP:
    464     // Don't invert constant FP values after legalize.  The negated constant
    465     // isn't necessarily legal.
    466     return LegalOperations ? 0 : 1;
    467   case ISD::FADD:
    468     // FIXME: determine better conditions for this xform.
    469     if (!Options->UnsafeFPMath) return 0;
    470 
    471     // After operation legalization, it might not be legal to create new FSUBs.
    472     if (LegalOperations &&
    473         !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
    474       return 0;
    475 
    476     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
    477     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
    478                                     Options, Depth + 1))
    479       return V;
    480     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
    481     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
    482                               Depth + 1);
    483   case ISD::FSUB:
    484     // We can't turn -(A-B) into B-A when we honor signed zeros.
    485     if (!Options->UnsafeFPMath) return 0;
    486 
    487     // fold (fneg (fsub A, B)) -> (fsub B, A)
    488     return 1;
    489 
    490   case ISD::FMUL:
    491   case ISD::FDIV:
    492     if (Options->HonorSignDependentRoundingFPMath()) return 0;
    493 
    494     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
    495     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
    496                                     Options, Depth + 1))
    497       return V;
    498 
    499     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
    500                               Depth + 1);
    501 
    502   case ISD::FP_EXTEND:
    503   case ISD::FP_ROUND:
    504   case ISD::FSIN:
    505     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
    506                               Depth + 1);
    507   }
    508 }
    509 
    510 /// GetNegatedExpression - If isNegatibleForFree returns true, this function
    511 /// returns the newly negated expression.
    512 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
    513                                     bool LegalOperations, unsigned Depth = 0) {
    514   // fneg is removable even if it has multiple uses.
    515   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
    516 
    517   // Don't allow anything with multiple uses.
    518   assert(Op.hasOneUse() && "Unknown reuse!");
    519 
    520   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
    521   switch (Op.getOpcode()) {
    522   default: llvm_unreachable("Unknown code");
    523   case ISD::ConstantFP: {
    524     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
    525     V.changeSign();
    526     return DAG.getConstantFP(V, Op.getValueType());
    527   }
    528   case ISD::FADD:
    529     // FIXME: determine better conditions for this xform.
    530     assert(DAG.getTarget().Options.UnsafeFPMath);
    531 
    532     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
    533     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
    534                            DAG.getTargetLoweringInfo(),
    535                            &DAG.getTarget().Options, Depth+1))
    536       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
    537                          GetNegatedExpression(Op.getOperand(0), DAG,
    538                                               LegalOperations, Depth+1),
    539                          Op.getOperand(1));
    540     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
    541     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
    542                        GetNegatedExpression(Op.getOperand(1), DAG,
    543                                             LegalOperations, Depth+1),
    544                        Op.getOperand(0));
    545   case ISD::FSUB:
    546     // We can't turn -(A-B) into B-A when we honor signed zeros.
    547     assert(DAG.getTarget().Options.UnsafeFPMath);
    548 
    549     // fold (fneg (fsub 0, B)) -> B
    550     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
    551       if (N0CFP->getValueAPF().isZero())
    552         return Op.getOperand(1);
    553 
    554     // fold (fneg (fsub A, B)) -> (fsub B, A)
    555     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
    556                        Op.getOperand(1), Op.getOperand(0));
    557 
    558   case ISD::FMUL:
    559   case ISD::FDIV:
    560     assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath());
    561 
    562     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
    563     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
    564                            DAG.getTargetLoweringInfo(),
    565                            &DAG.getTarget().Options, Depth+1))
    566       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
    567                          GetNegatedExpression(Op.getOperand(0), DAG,
    568                                               LegalOperations, Depth+1),
    569                          Op.getOperand(1));
    570 
    571     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
    572     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
    573                        Op.getOperand(0),
    574                        GetNegatedExpression(Op.getOperand(1), DAG,
    575                                             LegalOperations, Depth+1));
    576 
    577   case ISD::FP_EXTEND:
    578   case ISD::FSIN:
    579     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
    580                        GetNegatedExpression(Op.getOperand(0), DAG,
    581                                             LegalOperations, Depth+1));
    582   case ISD::FP_ROUND:
    583       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
    584                          GetNegatedExpression(Op.getOperand(0), DAG,
    585                                               LegalOperations, Depth+1),
    586                          Op.getOperand(1));
    587   }
    588 }
    589 
    590 // isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
    591 // that selects between the target values used for true and false, making it
    592 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
    593 // the appropriate nodes based on the type of node we are checking. This
    594 // simplifies life a bit for the callers.
    595 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
    596                                     SDValue &CC) const {
    597   if (N.getOpcode() == ISD::SETCC) {
    598     LHS = N.getOperand(0);
    599     RHS = N.getOperand(1);
    600     CC  = N.getOperand(2);
    601     return true;
    602   }
    603 
    604   if (N.getOpcode() != ISD::SELECT_CC ||
    605       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
    606       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
    607     return false;
    608 
    609   LHS = N.getOperand(0);
    610   RHS = N.getOperand(1);
    611   CC  = N.getOperand(4);
    612   return true;
    613 }
    614 
    615 // isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
    616 // one use.  If this is true, it allows the users to invert the operation for
    617 // free when it is profitable to do so.
    618 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
    619   SDValue N0, N1, N2;
    620   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
    621     return true;
    622   return false;
    623 }
    624 
    625 /// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose
    626 /// elements are all the same constant or undefined.
    627 static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
    628   BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
    629   if (!C)
    630     return false;
    631 
    632   APInt SplatUndef;
    633   unsigned SplatBitSize;
    634   bool HasAnyUndefs;
    635   EVT EltVT = N->getValueType(0).getVectorElementType();
    636   return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
    637                              HasAnyUndefs) &&
    638           EltVT.getSizeInBits() >= SplatBitSize);
    639 }
    640 
    641 // \brief Returns the SDNode if it is a constant BuildVector or constant.
    642 static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) {
    643   if (isa<ConstantSDNode>(N))
    644     return N.getNode();
    645   BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
    646   if(BV && BV->isConstant())
    647     return BV;
    648   return nullptr;
    649 }
    650 
    651 // \brief Returns the SDNode if it is a constant splat BuildVector or constant
    652 // int.
    653 static ConstantSDNode *isConstOrConstSplat(SDValue N) {
    654   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
    655     return CN;
    656 
    657   if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
    658     BitVector UndefElements;
    659     ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
    660 
    661     // BuildVectors can truncate their operands. Ignore that case here.
    662     // FIXME: We blindly ignore splats which include undef which is overly
    663     // pessimistic.
    664     if (CN && UndefElements.none() &&
    665         CN->getValueType(0) == N.getValueType().getScalarType())
    666       return CN;
    667   }
    668 
    669   return nullptr;
    670 }
    671 
    672 SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
    673                                     SDValue N0, SDValue N1) {
    674   EVT VT = N0.getValueType();
    675   if (N0.getOpcode() == Opc) {
    676     if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) {
    677       if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) {
    678         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
    679         SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R);
    680         if (!OpNode.getNode())
    681           return SDValue();
    682         return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
    683       }
    684       if (N0.hasOneUse()) {
    685         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
    686         // use
    687         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
    688         if (!OpNode.getNode())
    689           return SDValue();
    690         AddToWorkList(OpNode.getNode());
    691         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
    692       }
    693     }
    694   }
    695 
    696   if (N1.getOpcode() == Opc) {
    697     if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) {
    698       if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) {
    699         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
    700         SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L);
    701         if (!OpNode.getNode())
    702           return SDValue();
    703         return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
    704       }
    705       if (N1.hasOneUse()) {
    706         // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one
    707         // use
    708         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0);
    709         if (!OpNode.getNode())
    710           return SDValue();
    711         AddToWorkList(OpNode.getNode());
    712         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
    713       }
    714     }
    715   }
    716 
    717   return SDValue();
    718 }
    719 
    720 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
    721                                bool AddTo) {
    722   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
    723   ++NodesCombined;
    724   DEBUG(dbgs() << "\nReplacing.1 ";
    725         N->dump(&DAG);
    726         dbgs() << "\nWith: ";
    727         To[0].getNode()->dump(&DAG);
    728         dbgs() << " and " << NumTo-1 << " other values\n";
    729         for (unsigned i = 0, e = NumTo; i != e; ++i)
    730           assert((!To[i].getNode() ||
    731                   N->getValueType(i) == To[i].getValueType()) &&
    732                  "Cannot combine value to value of different type!"));
    733   WorkListRemover DeadNodes(*this);
    734   DAG.ReplaceAllUsesWith(N, To);
    735   if (AddTo) {
    736     // Push the new nodes and any users onto the worklist
    737     for (unsigned i = 0, e = NumTo; i != e; ++i) {
    738       if (To[i].getNode()) {
    739         AddToWorkList(To[i].getNode());
    740         AddUsersToWorkList(To[i].getNode());
    741       }
    742     }
    743   }
    744 
    745   // Finally, if the node is now dead, remove it from the graph.  The node
    746   // may not be dead if the replacement process recursively simplified to
    747   // something else needing this node.
    748   if (N->use_empty()) {
    749     // Nodes can be reintroduced into the worklist.  Make sure we do not
    750     // process a node that has been replaced.
    751     removeFromWorkList(N);
    752 
    753     // Finally, since the node is now dead, remove it from the graph.
    754     DAG.DeleteNode(N);
    755   }
    756   return SDValue(N, 0);
    757 }
    758 
    759 void DAGCombiner::
    760 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
    761   // Replace all uses.  If any nodes become isomorphic to other nodes and
    762   // are deleted, make sure to remove them from our worklist.
    763   WorkListRemover DeadNodes(*this);
    764   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
    765 
    766   // Push the new node and any (possibly new) users onto the worklist.
    767   AddToWorkList(TLO.New.getNode());
    768   AddUsersToWorkList(TLO.New.getNode());
    769 
    770   // Finally, if the node is now dead, remove it from the graph.  The node
    771   // may not be dead if the replacement process recursively simplified to
    772   // something else needing this node.
    773   if (TLO.Old.getNode()->use_empty()) {
    774     removeFromWorkList(TLO.Old.getNode());
    775 
    776     // If the operands of this node are only used by the node, they will now
    777     // be dead.  Make sure to visit them first to delete dead nodes early.
    778     for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
    779       if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
    780         AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
    781 
    782     DAG.DeleteNode(TLO.Old.getNode());
    783   }
    784 }
    785 
    786 /// SimplifyDemandedBits - Check the specified integer node value to see if
    787 /// it can be simplified or if things it uses can be simplified by bit
    788 /// propagation.  If so, return true.
    789 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
    790   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
    791   APInt KnownZero, KnownOne;
    792   if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
    793     return false;
    794 
    795   // Revisit the node.
    796   AddToWorkList(Op.getNode());
    797 
    798   // Replace the old value with the new one.
    799   ++NodesCombined;
    800   DEBUG(dbgs() << "\nReplacing.2 ";
    801         TLO.Old.getNode()->dump(&DAG);
    802         dbgs() << "\nWith: ";
    803         TLO.New.getNode()->dump(&DAG);
    804         dbgs() << '\n');
    805 
    806   CommitTargetLoweringOpt(TLO);
    807   return true;
    808 }
    809 
    810 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
    811   SDLoc dl(Load);
    812   EVT VT = Load->getValueType(0);
    813   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
    814 
    815   DEBUG(dbgs() << "\nReplacing.9 ";
    816         Load->dump(&DAG);
    817         dbgs() << "\nWith: ";
    818         Trunc.getNode()->dump(&DAG);
    819         dbgs() << '\n');
    820   WorkListRemover DeadNodes(*this);
    821   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
    822   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
    823   removeFromWorkList(Load);
    824   DAG.DeleteNode(Load);
    825   AddToWorkList(Trunc.getNode());
    826 }
    827 
    828 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
    829   Replace = false;
    830   SDLoc dl(Op);
    831   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
    832     EVT MemVT = LD->getMemoryVT();
    833     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
    834       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
    835                                                   : ISD::EXTLOAD)
    836       : LD->getExtensionType();
    837     Replace = true;
    838     return DAG.getExtLoad(ExtType, dl, PVT,
    839                           LD->getChain(), LD->getBasePtr(),
    840                           MemVT, LD->getMemOperand());
    841   }
    842 
    843   unsigned Opc = Op.getOpcode();
    844   switch (Opc) {
    845   default: break;
    846   case ISD::AssertSext:
    847     return DAG.getNode(ISD::AssertSext, dl, PVT,
    848                        SExtPromoteOperand(Op.getOperand(0), PVT),
    849                        Op.getOperand(1));
    850   case ISD::AssertZext:
    851     return DAG.getNode(ISD::AssertZext, dl, PVT,
    852                        ZExtPromoteOperand(Op.getOperand(0), PVT),
    853                        Op.getOperand(1));
    854   case ISD::Constant: {
    855     unsigned ExtOpc =
    856       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
    857     return DAG.getNode(ExtOpc, dl, PVT, Op);
    858   }
    859   }
    860 
    861   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
    862     return SDValue();
    863   return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
    864 }
    865 
    866 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
    867   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
    868     return SDValue();
    869   EVT OldVT = Op.getValueType();
    870   SDLoc dl(Op);
    871   bool Replace = false;
    872   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
    873   if (!NewOp.getNode())
    874     return SDValue();
    875   AddToWorkList(NewOp.getNode());
    876 
    877   if (Replace)
    878     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
    879   return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
    880                      DAG.getValueType(OldVT));
    881 }
    882 
    883 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
    884   EVT OldVT = Op.getValueType();
    885   SDLoc dl(Op);
    886   bool Replace = false;
    887   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
    888   if (!NewOp.getNode())
    889     return SDValue();
    890   AddToWorkList(NewOp.getNode());
    891 
    892   if (Replace)
    893     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
    894   return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
    895 }
    896 
    897 /// PromoteIntBinOp - Promote the specified integer binary operation if the
    898 /// target indicates it is beneficial. e.g. On x86, it's usually better to
    899 /// promote i16 operations to i32 since i16 instructions are longer.
    900 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
    901   if (!LegalOperations)
    902     return SDValue();
    903 
    904   EVT VT = Op.getValueType();
    905   if (VT.isVector() || !VT.isInteger())
    906     return SDValue();
    907 
    908   // If operation type is 'undesirable', e.g. i16 on x86, consider
    909   // promoting it.
    910   unsigned Opc = Op.getOpcode();
    911   if (TLI.isTypeDesirableForOp(Opc, VT))
    912     return SDValue();
    913 
    914   EVT PVT = VT;
    915   // Consult target whether it is a good idea to promote this operation and
    916   // what's the right type to promote it to.
    917   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
    918     assert(PVT != VT && "Don't know what type to promote to!");
    919 
    920     bool Replace0 = false;
    921     SDValue N0 = Op.getOperand(0);
    922     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
    923     if (!NN0.getNode())
    924       return SDValue();
    925 
    926     bool Replace1 = false;
    927     SDValue N1 = Op.getOperand(1);
    928     SDValue NN1;
    929     if (N0 == N1)
    930       NN1 = NN0;
    931     else {
    932       NN1 = PromoteOperand(N1, PVT, Replace1);
    933       if (!NN1.getNode())
    934         return SDValue();
    935     }
    936 
    937     AddToWorkList(NN0.getNode());
    938     if (NN1.getNode())
    939       AddToWorkList(NN1.getNode());
    940 
    941     if (Replace0)
    942       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
    943     if (Replace1)
    944       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
    945 
    946     DEBUG(dbgs() << "\nPromoting ";
    947           Op.getNode()->dump(&DAG));
    948     SDLoc dl(Op);
    949     return DAG.getNode(ISD::TRUNCATE, dl, VT,
    950                        DAG.getNode(Opc, dl, PVT, NN0, NN1));
    951   }
    952   return SDValue();
    953 }
    954 
    955 /// PromoteIntShiftOp - Promote the specified integer shift operation if the
    956 /// target indicates it is beneficial. e.g. On x86, it's usually better to
    957 /// promote i16 operations to i32 since i16 instructions are longer.
    958 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
    959   if (!LegalOperations)
    960     return SDValue();
    961 
    962   EVT VT = Op.getValueType();
    963   if (VT.isVector() || !VT.isInteger())
    964     return SDValue();
    965 
    966   // If operation type is 'undesirable', e.g. i16 on x86, consider
    967   // promoting it.
    968   unsigned Opc = Op.getOpcode();
    969   if (TLI.isTypeDesirableForOp(Opc, VT))
    970     return SDValue();
    971 
    972   EVT PVT = VT;
    973   // Consult target whether it is a good idea to promote this operation and
    974   // what's the right type to promote it to.
    975   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
    976     assert(PVT != VT && "Don't know what type to promote to!");
    977 
    978     bool Replace = false;
    979     SDValue N0 = Op.getOperand(0);
    980     if (Opc == ISD::SRA)
    981       N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
    982     else if (Opc == ISD::SRL)
    983       N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
    984     else
    985       N0 = PromoteOperand(N0, PVT, Replace);
    986     if (!N0.getNode())
    987       return SDValue();
    988 
    989     AddToWorkList(N0.getNode());
    990     if (Replace)
    991       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
    992 
    993     DEBUG(dbgs() << "\nPromoting ";
    994           Op.getNode()->dump(&DAG));
    995     SDLoc dl(Op);
    996     return DAG.getNode(ISD::TRUNCATE, dl, VT,
    997                        DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
    998   }
    999   return SDValue();
   1000 }
   1001 
   1002 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
   1003   if (!LegalOperations)
   1004     return SDValue();
   1005 
   1006   EVT VT = Op.getValueType();
   1007   if (VT.isVector() || !VT.isInteger())
   1008     return SDValue();
   1009 
   1010   // If operation type is 'undesirable', e.g. i16 on x86, consider
   1011   // promoting it.
   1012   unsigned Opc = Op.getOpcode();
   1013   if (TLI.isTypeDesirableForOp(Opc, VT))
   1014     return SDValue();
   1015 
   1016   EVT PVT = VT;
   1017   // Consult target whether it is a good idea to promote this operation and
   1018   // what's the right type to promote it to.
   1019   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
   1020     assert(PVT != VT && "Don't know what type to promote to!");
   1021     // fold (aext (aext x)) -> (aext x)
   1022     // fold (aext (zext x)) -> (zext x)
   1023     // fold (aext (sext x)) -> (sext x)
   1024     DEBUG(dbgs() << "\nPromoting ";
   1025           Op.getNode()->dump(&DAG));
   1026     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
   1027   }
   1028   return SDValue();
   1029 }
   1030 
   1031 bool DAGCombiner::PromoteLoad(SDValue Op) {
   1032   if (!LegalOperations)
   1033     return false;
   1034 
   1035   EVT VT = Op.getValueType();
   1036   if (VT.isVector() || !VT.isInteger())
   1037     return false;
   1038 
   1039   // If operation type is 'undesirable', e.g. i16 on x86, consider
   1040   // promoting it.
   1041   unsigned Opc = Op.getOpcode();
   1042   if (TLI.isTypeDesirableForOp(Opc, VT))
   1043     return false;
   1044 
   1045   EVT PVT = VT;
   1046   // Consult target whether it is a good idea to promote this operation and
   1047   // what's the right type to promote it to.
   1048   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
   1049     assert(PVT != VT && "Don't know what type to promote to!");
   1050 
   1051     SDLoc dl(Op);
   1052     SDNode *N = Op.getNode();
   1053     LoadSDNode *LD = cast<LoadSDNode>(N);
   1054     EVT MemVT = LD->getMemoryVT();
   1055     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
   1056       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
   1057                                                   : ISD::EXTLOAD)
   1058       : LD->getExtensionType();
   1059     SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
   1060                                    LD->getChain(), LD->getBasePtr(),
   1061                                    MemVT, LD->getMemOperand());
   1062     SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
   1063 
   1064     DEBUG(dbgs() << "\nPromoting ";
   1065           N->dump(&DAG);
   1066           dbgs() << "\nTo: ";
   1067           Result.getNode()->dump(&DAG);
   1068           dbgs() << '\n');
   1069     WorkListRemover DeadNodes(*this);
   1070     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
   1071     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
   1072     removeFromWorkList(N);
   1073     DAG.DeleteNode(N);
   1074     AddToWorkList(Result.getNode());
   1075     return true;
   1076   }
   1077   return false;
   1078 }
   1079 
   1080 
   1081 //===----------------------------------------------------------------------===//
   1082 //  Main DAG Combiner implementation
   1083 //===----------------------------------------------------------------------===//
   1084 
   1085 void DAGCombiner::Run(CombineLevel AtLevel) {
   1086   // set the instance variables, so that the various visit routines may use it.
   1087   Level = AtLevel;
   1088   LegalOperations = Level >= AfterLegalizeVectorOps;
   1089   LegalTypes = Level >= AfterLegalizeTypes;
   1090 
   1091   // Add all the dag nodes to the worklist.
   1092   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
   1093        E = DAG.allnodes_end(); I != E; ++I)
   1094     AddToWorkList(I);
   1095 
   1096   // Create a dummy node (which is not added to allnodes), that adds a reference
   1097   // to the root node, preventing it from being deleted, and tracking any
   1098   // changes of the root.
   1099   HandleSDNode Dummy(DAG.getRoot());
   1100 
   1101   // The root of the dag may dangle to deleted nodes until the dag combiner is
   1102   // done.  Set it to null to avoid confusion.
   1103   DAG.setRoot(SDValue());
   1104 
   1105   // while the worklist isn't empty, find a node and
   1106   // try and combine it.
   1107   while (!WorkListContents.empty()) {
   1108     SDNode *N;
   1109     // The WorkListOrder holds the SDNodes in order, but it may contain
   1110     // duplicates.
   1111     // In order to avoid a linear scan, we use a set (O(log N)) to hold what the
   1112     // worklist *should* contain, and check the node we want to visit is should
   1113     // actually be visited.
   1114     do {
   1115       N = WorkListOrder.pop_back_val();
   1116     } while (!WorkListContents.erase(N));
   1117 
   1118     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
   1119     // N is deleted from the DAG, since they too may now be dead or may have a
   1120     // reduced number of uses, allowing other xforms.
   1121     if (N->use_empty() && N != &Dummy) {
   1122       for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
   1123         AddToWorkList(N->getOperand(i).getNode());
   1124 
   1125       DAG.DeleteNode(N);
   1126       continue;
   1127     }
   1128 
   1129     SDValue RV = combine(N);
   1130 
   1131     if (!RV.getNode())
   1132       continue;
   1133 
   1134     ++NodesCombined;
   1135 
   1136     // If we get back the same node we passed in, rather than a new node or
   1137     // zero, we know that the node must have defined multiple values and
   1138     // CombineTo was used.  Since CombineTo takes care of the worklist
   1139     // mechanics for us, we have no work to do in this case.
   1140     if (RV.getNode() == N)
   1141       continue;
   1142 
   1143     assert(N->getOpcode() != ISD::DELETED_NODE &&
   1144            RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
   1145            "Node was deleted but visit returned new node!");
   1146 
   1147     DEBUG(dbgs() << "\nReplacing.3 ";
   1148           N->dump(&DAG);
   1149           dbgs() << "\nWith: ";
   1150           RV.getNode()->dump(&DAG);
   1151           dbgs() << '\n');
   1152 
   1153     // Transfer debug value.
   1154     DAG.TransferDbgValues(SDValue(N, 0), RV);
   1155     WorkListRemover DeadNodes(*this);
   1156     if (N->getNumValues() == RV.getNode()->getNumValues())
   1157       DAG.ReplaceAllUsesWith(N, RV.getNode());
   1158     else {
   1159       assert(N->getValueType(0) == RV.getValueType() &&
   1160              N->getNumValues() == 1 && "Type mismatch");
   1161       SDValue OpV = RV;
   1162       DAG.ReplaceAllUsesWith(N, &OpV);
   1163     }
   1164 
   1165     // Push the new node and any users onto the worklist
   1166     AddToWorkList(RV.getNode());
   1167     AddUsersToWorkList(RV.getNode());
   1168 
   1169     // Add any uses of the old node to the worklist in case this node is the
   1170     // last one that uses them.  They may become dead after this node is
   1171     // deleted.
   1172     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
   1173       AddToWorkList(N->getOperand(i).getNode());
   1174 
   1175     // Finally, if the node is now dead, remove it from the graph.  The node
   1176     // may not be dead if the replacement process recursively simplified to
   1177     // something else needing this node.
   1178     if (N->use_empty()) {
   1179       // Nodes can be reintroduced into the worklist.  Make sure we do not
   1180       // process a node that has been replaced.
   1181       removeFromWorkList(N);
   1182 
   1183       // Finally, since the node is now dead, remove it from the graph.
   1184       DAG.DeleteNode(N);
   1185     }
   1186   }
   1187 
   1188   // If the root changed (e.g. it was a dead load, update the root).
   1189   DAG.setRoot(Dummy.getValue());
   1190   DAG.RemoveDeadNodes();
   1191 }
   1192 
   1193 SDValue DAGCombiner::visit(SDNode *N) {
   1194   switch (N->getOpcode()) {
   1195   default: break;
   1196   case ISD::TokenFactor:        return visitTokenFactor(N);
   1197   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
   1198   case ISD::ADD:                return visitADD(N);
   1199   case ISD::SUB:                return visitSUB(N);
   1200   case ISD::ADDC:               return visitADDC(N);
   1201   case ISD::SUBC:               return visitSUBC(N);
   1202   case ISD::ADDE:               return visitADDE(N);
   1203   case ISD::SUBE:               return visitSUBE(N);
   1204   case ISD::MUL:                return visitMUL(N);
   1205   case ISD::SDIV:               return visitSDIV(N);
   1206   case ISD::UDIV:               return visitUDIV(N);
   1207   case ISD::SREM:               return visitSREM(N);
   1208   case ISD::UREM:               return visitUREM(N);
   1209   case ISD::MULHU:              return visitMULHU(N);
   1210   case ISD::MULHS:              return visitMULHS(N);
   1211   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
   1212   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
   1213   case ISD::SMULO:              return visitSMULO(N);
   1214   case ISD::UMULO:              return visitUMULO(N);
   1215   case ISD::SDIVREM:            return visitSDIVREM(N);
   1216   case ISD::UDIVREM:            return visitUDIVREM(N);
   1217   case ISD::AND:                return visitAND(N);
   1218   case ISD::OR:                 return visitOR(N);
   1219   case ISD::XOR:                return visitXOR(N);
   1220   case ISD::SHL:                return visitSHL(N);
   1221   case ISD::SRA:                return visitSRA(N);
   1222   case ISD::SRL:                return visitSRL(N);
   1223   case ISD::ROTR:
   1224   case ISD::ROTL:               return visitRotate(N);
   1225   case ISD::CTLZ:               return visitCTLZ(N);
   1226   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
   1227   case ISD::CTTZ:               return visitCTTZ(N);
   1228   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
   1229   case ISD::CTPOP:              return visitCTPOP(N);
   1230   case ISD::SELECT:             return visitSELECT(N);
   1231   case ISD::VSELECT:            return visitVSELECT(N);
   1232   case ISD::SELECT_CC:          return visitSELECT_CC(N);
   1233   case ISD::SETCC:              return visitSETCC(N);
   1234   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
   1235   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
   1236   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
   1237   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
   1238   case ISD::TRUNCATE:           return visitTRUNCATE(N);
   1239   case ISD::BITCAST:            return visitBITCAST(N);
   1240   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
   1241   case ISD::FADD:               return visitFADD(N);
   1242   case ISD::FSUB:               return visitFSUB(N);
   1243   case ISD::FMUL:               return visitFMUL(N);
   1244   case ISD::FMA:                return visitFMA(N);
   1245   case ISD::FDIV:               return visitFDIV(N);
   1246   case ISD::FREM:               return visitFREM(N);
   1247   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
   1248   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
   1249   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
   1250   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
   1251   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
   1252   case ISD::FP_ROUND:           return visitFP_ROUND(N);
   1253   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
   1254   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
   1255   case ISD::FNEG:               return visitFNEG(N);
   1256   case ISD::FABS:               return visitFABS(N);
   1257   case ISD::FFLOOR:             return visitFFLOOR(N);
   1258   case ISD::FCEIL:              return visitFCEIL(N);
   1259   case ISD::FTRUNC:             return visitFTRUNC(N);
   1260   case ISD::BRCOND:             return visitBRCOND(N);
   1261   case ISD::BR_CC:              return visitBR_CC(N);
   1262   case ISD::LOAD:               return visitLOAD(N);
   1263   case ISD::STORE:              return visitSTORE(N);
   1264   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
   1265   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
   1266   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
   1267   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
   1268   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
   1269   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
   1270   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
   1271   }
   1272   return SDValue();
   1273 }
   1274 
   1275 SDValue DAGCombiner::combine(SDNode *N) {
   1276   SDValue RV = visit(N);
   1277 
   1278   // If nothing happened, try a target-specific DAG combine.
   1279   if (!RV.getNode()) {
   1280     assert(N->getOpcode() != ISD::DELETED_NODE &&
   1281            "Node was deleted but visit returned NULL!");
   1282 
   1283     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
   1284         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
   1285 
   1286       // Expose the DAG combiner to the target combiner impls.
   1287       TargetLowering::DAGCombinerInfo
   1288         DagCombineInfo(DAG, Level, false, this);
   1289 
   1290       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
   1291     }
   1292   }
   1293 
   1294   // If nothing happened still, try promoting the operation.
   1295   if (!RV.getNode()) {
   1296     switch (N->getOpcode()) {
   1297     default: break;
   1298     case ISD::ADD:
   1299     case ISD::SUB:
   1300     case ISD::MUL:
   1301     case ISD::AND:
   1302     case ISD::OR:
   1303     case ISD::XOR:
   1304       RV = PromoteIntBinOp(SDValue(N, 0));
   1305       break;
   1306     case ISD::SHL:
   1307     case ISD::SRA:
   1308     case ISD::SRL:
   1309       RV = PromoteIntShiftOp(SDValue(N, 0));
   1310       break;
   1311     case ISD::SIGN_EXTEND:
   1312     case ISD::ZERO_EXTEND:
   1313     case ISD::ANY_EXTEND:
   1314       RV = PromoteExtend(SDValue(N, 0));
   1315       break;
   1316     case ISD::LOAD:
   1317       if (PromoteLoad(SDValue(N, 0)))
   1318         RV = SDValue(N, 0);
   1319       break;
   1320     }
   1321   }
   1322 
   1323   // If N is a commutative binary node, try commuting it to enable more
   1324   // sdisel CSE.
   1325   if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
   1326       N->getNumValues() == 1) {
   1327     SDValue N0 = N->getOperand(0);
   1328     SDValue N1 = N->getOperand(1);
   1329 
   1330     // Constant operands are canonicalized to RHS.
   1331     if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
   1332       SDValue Ops[] = {N1, N0};
   1333       SDNode *CSENode;
   1334       if (const BinaryWithFlagsSDNode *BinNode =
   1335               dyn_cast<BinaryWithFlagsSDNode>(N)) {
   1336         CSENode = DAG.getNodeIfExists(
   1337             N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(),
   1338             BinNode->hasNoSignedWrap(), BinNode->isExact());
   1339       } else {
   1340         CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
   1341       }
   1342       if (CSENode)
   1343         return SDValue(CSENode, 0);
   1344     }
   1345   }
   1346 
   1347   return RV;
   1348 }
   1349 
   1350 /// getInputChainForNode - Given a node, return its input chain if it has one,
   1351 /// otherwise return a null sd operand.
   1352 static SDValue getInputChainForNode(SDNode *N) {
   1353   if (unsigned NumOps = N->getNumOperands()) {
   1354     if (N->getOperand(0).getValueType() == MVT::Other)
   1355       return N->getOperand(0);
   1356     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
   1357       return N->getOperand(NumOps-1);
   1358     for (unsigned i = 1; i < NumOps-1; ++i)
   1359       if (N->getOperand(i).getValueType() == MVT::Other)
   1360         return N->getOperand(i);
   1361   }
   1362   return SDValue();
   1363 }
   1364 
   1365 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
   1366   // If N has two operands, where one has an input chain equal to the other,
   1367   // the 'other' chain is redundant.
   1368   if (N->getNumOperands() == 2) {
   1369     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
   1370       return N->getOperand(0);
   1371     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
   1372       return N->getOperand(1);
   1373   }
   1374 
   1375   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
   1376   SmallVector<SDValue, 8> Ops;    // Ops for replacing token factor.
   1377   SmallPtrSet<SDNode*, 16> SeenOps;
   1378   bool Changed = false;             // If we should replace this token factor.
   1379 
   1380   // Start out with this token factor.
   1381   TFs.push_back(N);
   1382 
   1383   // Iterate through token factors.  The TFs grows when new token factors are
   1384   // encountered.
   1385   for (unsigned i = 0; i < TFs.size(); ++i) {
   1386     SDNode *TF = TFs[i];
   1387 
   1388     // Check each of the operands.
   1389     for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
   1390       SDValue Op = TF->getOperand(i);
   1391 
   1392       switch (Op.getOpcode()) {
   1393       case ISD::EntryToken:
   1394         // Entry tokens don't need to be added to the list. They are
   1395         // rededundant.
   1396         Changed = true;
   1397         break;
   1398 
   1399       case ISD::TokenFactor:
   1400         if (Op.hasOneUse() &&
   1401             std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
   1402           // Queue up for processing.
   1403           TFs.push_back(Op.getNode());
   1404           // Clean up in case the token factor is removed.
   1405           AddToWorkList(Op.getNode());
   1406           Changed = true;
   1407           break;
   1408         }
   1409         // Fall thru
   1410 
   1411       default:
   1412         // Only add if it isn't already in the list.
   1413         if (SeenOps.insert(Op.getNode()))
   1414           Ops.push_back(Op);
   1415         else
   1416           Changed = true;
   1417         break;
   1418       }
   1419     }
   1420   }
   1421 
   1422   SDValue Result;
   1423 
   1424   // If we've change things around then replace token factor.
   1425   if (Changed) {
   1426     if (Ops.empty()) {
   1427       // The entry token is the only possible outcome.
   1428       Result = DAG.getEntryNode();
   1429     } else {
   1430       // New and improved token factor.
   1431       Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
   1432     }
   1433 
   1434     // Don't add users to work list.
   1435     return CombineTo(N, Result, false);
   1436   }
   1437 
   1438   return Result;
   1439 }
   1440 
   1441 /// MERGE_VALUES can always be eliminated.
   1442 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
   1443   WorkListRemover DeadNodes(*this);
   1444   // Replacing results may cause a different MERGE_VALUES to suddenly
   1445   // be CSE'd with N, and carry its uses with it. Iterate until no
   1446   // uses remain, to ensure that the node can be safely deleted.
   1447   // First add the users of this node to the work list so that they
   1448   // can be tried again once they have new operands.
   1449   AddUsersToWorkList(N);
   1450   do {
   1451     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
   1452       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
   1453   } while (!N->use_empty());
   1454   removeFromWorkList(N);
   1455   DAG.DeleteNode(N);
   1456   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   1457 }
   1458 
   1459 static
   1460 SDValue combineShlAddConstant(SDLoc DL, SDValue N0, SDValue N1,
   1461                               SelectionDAG &DAG) {
   1462   EVT VT = N0.getValueType();
   1463   SDValue N00 = N0.getOperand(0);
   1464   SDValue N01 = N0.getOperand(1);
   1465   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
   1466 
   1467   if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() &&
   1468       isa<ConstantSDNode>(N00.getOperand(1))) {
   1469     // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
   1470     N0 = DAG.getNode(ISD::ADD, SDLoc(N0), VT,
   1471                      DAG.getNode(ISD::SHL, SDLoc(N00), VT,
   1472                                  N00.getOperand(0), N01),
   1473                      DAG.getNode(ISD::SHL, SDLoc(N01), VT,
   1474                                  N00.getOperand(1), N01));
   1475     return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
   1476   }
   1477 
   1478   return SDValue();
   1479 }
   1480 
   1481 SDValue DAGCombiner::visitADD(SDNode *N) {
   1482   SDValue N0 = N->getOperand(0);
   1483   SDValue N1 = N->getOperand(1);
   1484   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   1485   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   1486   EVT VT = N0.getValueType();
   1487 
   1488   // fold vector ops
   1489   if (VT.isVector()) {
   1490     SDValue FoldedVOp = SimplifyVBinOp(N);
   1491     if (FoldedVOp.getNode()) return FoldedVOp;
   1492 
   1493     // fold (add x, 0) -> x, vector edition
   1494     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   1495       return N0;
   1496     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   1497       return N1;
   1498   }
   1499 
   1500   // fold (add x, undef) -> undef
   1501   if (N0.getOpcode() == ISD::UNDEF)
   1502     return N0;
   1503   if (N1.getOpcode() == ISD::UNDEF)
   1504     return N1;
   1505   // fold (add c1, c2) -> c1+c2
   1506   if (N0C && N1C)
   1507     return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
   1508   // canonicalize constant to RHS
   1509   if (N0C && !N1C)
   1510     return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
   1511   // fold (add x, 0) -> x
   1512   if (N1C && N1C->isNullValue())
   1513     return N0;
   1514   // fold (add Sym, c) -> Sym+c
   1515   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
   1516     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
   1517         GA->getOpcode() == ISD::GlobalAddress)
   1518       return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
   1519                                   GA->getOffset() +
   1520                                     (uint64_t)N1C->getSExtValue());
   1521   // fold ((c1-A)+c2) -> (c1+c2)-A
   1522   if (N1C && N0.getOpcode() == ISD::SUB)
   1523     if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
   1524       return DAG.getNode(ISD::SUB, SDLoc(N), VT,
   1525                          DAG.getConstant(N1C->getAPIntValue()+
   1526                                          N0C->getAPIntValue(), VT),
   1527                          N0.getOperand(1));
   1528   // reassociate add
   1529   SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1);
   1530   if (RADD.getNode())
   1531     return RADD;
   1532   // fold ((0-A) + B) -> B-A
   1533   if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
   1534       cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
   1535     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
   1536   // fold (A + (0-B)) -> A-B
   1537   if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
   1538       cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
   1539     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
   1540   // fold (A+(B-A)) -> B
   1541   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
   1542     return N1.getOperand(0);
   1543   // fold ((B-A)+A) -> B
   1544   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
   1545     return N0.getOperand(0);
   1546   // fold (A+(B-(A+C))) to (B-C)
   1547   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
   1548       N0 == N1.getOperand(1).getOperand(0))
   1549     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
   1550                        N1.getOperand(1).getOperand(1));
   1551   // fold (A+(B-(C+A))) to (B-C)
   1552   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
   1553       N0 == N1.getOperand(1).getOperand(1))
   1554     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
   1555                        N1.getOperand(1).getOperand(0));
   1556   // fold (A+((B-A)+or-C)) to (B+or-C)
   1557   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
   1558       N1.getOperand(0).getOpcode() == ISD::SUB &&
   1559       N0 == N1.getOperand(0).getOperand(1))
   1560     return DAG.getNode(N1.getOpcode(), SDLoc(N), VT,
   1561                        N1.getOperand(0).getOperand(0), N1.getOperand(1));
   1562 
   1563   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
   1564   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
   1565     SDValue N00 = N0.getOperand(0);
   1566     SDValue N01 = N0.getOperand(1);
   1567     SDValue N10 = N1.getOperand(0);
   1568     SDValue N11 = N1.getOperand(1);
   1569 
   1570     if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
   1571       return DAG.getNode(ISD::SUB, SDLoc(N), VT,
   1572                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
   1573                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
   1574   }
   1575 
   1576   if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
   1577     return SDValue(N, 0);
   1578 
   1579   // fold (a+b) -> (a|b) iff a and b share no bits.
   1580   if (VT.isInteger() && !VT.isVector()) {
   1581     APInt LHSZero, LHSOne;
   1582     APInt RHSZero, RHSOne;
   1583     DAG.computeKnownBits(N0, LHSZero, LHSOne);
   1584 
   1585     if (LHSZero.getBoolValue()) {
   1586       DAG.computeKnownBits(N1, RHSZero, RHSOne);
   1587 
   1588       // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
   1589       // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
   1590       if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){
   1591         if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT))
   1592           return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
   1593       }
   1594     }
   1595   }
   1596 
   1597   // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
   1598   if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) {
   1599     SDValue Result = combineShlAddConstant(SDLoc(N), N0, N1, DAG);
   1600     if (Result.getNode()) return Result;
   1601   }
   1602   if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) {
   1603     SDValue Result = combineShlAddConstant(SDLoc(N), N1, N0, DAG);
   1604     if (Result.getNode()) return Result;
   1605   }
   1606 
   1607   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
   1608   if (N1.getOpcode() == ISD::SHL &&
   1609       N1.getOperand(0).getOpcode() == ISD::SUB)
   1610     if (ConstantSDNode *C =
   1611           dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0)))
   1612       if (C->getAPIntValue() == 0)
   1613         return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
   1614                            DAG.getNode(ISD::SHL, SDLoc(N), VT,
   1615                                        N1.getOperand(0).getOperand(1),
   1616                                        N1.getOperand(1)));
   1617   if (N0.getOpcode() == ISD::SHL &&
   1618       N0.getOperand(0).getOpcode() == ISD::SUB)
   1619     if (ConstantSDNode *C =
   1620           dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0)))
   1621       if (C->getAPIntValue() == 0)
   1622         return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
   1623                            DAG.getNode(ISD::SHL, SDLoc(N), VT,
   1624                                        N0.getOperand(0).getOperand(1),
   1625                                        N0.getOperand(1)));
   1626 
   1627   if (N1.getOpcode() == ISD::AND) {
   1628     SDValue AndOp0 = N1.getOperand(0);
   1629     ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
   1630     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
   1631     unsigned DestBits = VT.getScalarType().getSizeInBits();
   1632 
   1633     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
   1634     // and similar xforms where the inner op is either ~0 or 0.
   1635     if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
   1636       SDLoc DL(N);
   1637       return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
   1638     }
   1639   }
   1640 
   1641   // add (sext i1), X -> sub X, (zext i1)
   1642   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
   1643       N0.getOperand(0).getValueType() == MVT::i1 &&
   1644       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
   1645     SDLoc DL(N);
   1646     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
   1647     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
   1648   }
   1649 
   1650   return SDValue();
   1651 }
   1652 
   1653 SDValue DAGCombiner::visitADDC(SDNode *N) {
   1654   SDValue N0 = N->getOperand(0);
   1655   SDValue N1 = N->getOperand(1);
   1656   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   1657   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   1658   EVT VT = N0.getValueType();
   1659 
   1660   // If the flag result is dead, turn this into an ADD.
   1661   if (!N->hasAnyUseOfValue(1))
   1662     return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1),
   1663                      DAG.getNode(ISD::CARRY_FALSE,
   1664                                  SDLoc(N), MVT::Glue));
   1665 
   1666   // canonicalize constant to RHS.
   1667   if (N0C && !N1C)
   1668     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
   1669 
   1670   // fold (addc x, 0) -> x + no carry out
   1671   if (N1C && N1C->isNullValue())
   1672     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
   1673                                         SDLoc(N), MVT::Glue));
   1674 
   1675   // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
   1676   APInt LHSZero, LHSOne;
   1677   APInt RHSZero, RHSOne;
   1678   DAG.computeKnownBits(N0, LHSZero, LHSOne);
   1679 
   1680   if (LHSZero.getBoolValue()) {
   1681     DAG.computeKnownBits(N1, RHSZero, RHSOne);
   1682 
   1683     // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
   1684     // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
   1685     if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
   1686       return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1),
   1687                        DAG.getNode(ISD::CARRY_FALSE,
   1688                                    SDLoc(N), MVT::Glue));
   1689   }
   1690 
   1691   return SDValue();
   1692 }
   1693 
   1694 SDValue DAGCombiner::visitADDE(SDNode *N) {
   1695   SDValue N0 = N->getOperand(0);
   1696   SDValue N1 = N->getOperand(1);
   1697   SDValue CarryIn = N->getOperand(2);
   1698   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   1699   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   1700 
   1701   // canonicalize constant to RHS
   1702   if (N0C && !N1C)
   1703     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
   1704                        N1, N0, CarryIn);
   1705 
   1706   // fold (adde x, y, false) -> (addc x, y)
   1707   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
   1708     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
   1709 
   1710   return SDValue();
   1711 }
   1712 
   1713 // Since it may not be valid to emit a fold to zero for vector initializers
   1714 // check if we can before folding.
   1715 static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
   1716                              SelectionDAG &DAG,
   1717                              bool LegalOperations, bool LegalTypes) {
   1718   if (!VT.isVector())
   1719     return DAG.getConstant(0, VT);
   1720   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
   1721     return DAG.getConstant(0, VT);
   1722   return SDValue();
   1723 }
   1724 
   1725 SDValue DAGCombiner::visitSUB(SDNode *N) {
   1726   SDValue N0 = N->getOperand(0);
   1727   SDValue N1 = N->getOperand(1);
   1728   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
   1729   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
   1730   ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
   1731     dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
   1732   EVT VT = N0.getValueType();
   1733 
   1734   // fold vector ops
   1735   if (VT.isVector()) {
   1736     SDValue FoldedVOp = SimplifyVBinOp(N);
   1737     if (FoldedVOp.getNode()) return FoldedVOp;
   1738 
   1739     // fold (sub x, 0) -> x, vector edition
   1740     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   1741       return N0;
   1742   }
   1743 
   1744   // fold (sub x, x) -> 0
   1745   // FIXME: Refactor this and xor and other similar operations together.
   1746   if (N0 == N1)
   1747     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
   1748   // fold (sub c1, c2) -> c1-c2
   1749   if (N0C && N1C)
   1750     return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
   1751   // fold (sub x, c) -> (add x, -c)
   1752   if (N1C)
   1753     return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0,
   1754                        DAG.getConstant(-N1C->getAPIntValue(), VT));
   1755   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
   1756   if (N0C && N0C->isAllOnesValue())
   1757     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
   1758   // fold A-(A-B) -> B
   1759   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
   1760     return N1.getOperand(1);
   1761   // fold (A+B)-A -> B
   1762   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
   1763     return N0.getOperand(1);
   1764   // fold (A+B)-B -> A
   1765   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
   1766     return N0.getOperand(0);
   1767   // fold C2-(A+C1) -> (C2-C1)-A
   1768   if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
   1769     SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
   1770                                    VT);
   1771     return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC,
   1772                        N1.getOperand(0));
   1773   }
   1774   // fold ((A+(B+or-C))-B) -> A+or-C
   1775   if (N0.getOpcode() == ISD::ADD &&
   1776       (N0.getOperand(1).getOpcode() == ISD::SUB ||
   1777        N0.getOperand(1).getOpcode() == ISD::ADD) &&
   1778       N0.getOperand(1).getOperand(0) == N1)
   1779     return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT,
   1780                        N0.getOperand(0), N0.getOperand(1).getOperand(1));
   1781   // fold ((A+(C+B))-B) -> A+C
   1782   if (N0.getOpcode() == ISD::ADD &&
   1783       N0.getOperand(1).getOpcode() == ISD::ADD &&
   1784       N0.getOperand(1).getOperand(1) == N1)
   1785     return DAG.getNode(ISD::ADD, SDLoc(N), VT,
   1786                        N0.getOperand(0), N0.getOperand(1).getOperand(0));
   1787   // fold ((A-(B-C))-C) -> A-B
   1788   if (N0.getOpcode() == ISD::SUB &&
   1789       N0.getOperand(1).getOpcode() == ISD::SUB &&
   1790       N0.getOperand(1).getOperand(1) == N1)
   1791     return DAG.getNode(ISD::SUB, SDLoc(N), VT,
   1792                        N0.getOperand(0), N0.getOperand(1).getOperand(0));
   1793 
   1794   // If either operand of a sub is undef, the result is undef
   1795   if (N0.getOpcode() == ISD::UNDEF)
   1796     return N0;
   1797   if (N1.getOpcode() == ISD::UNDEF)
   1798     return N1;
   1799 
   1800   // If the relocation model supports it, consider symbol offsets.
   1801   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
   1802     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
   1803       // fold (sub Sym, c) -> Sym-c
   1804       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
   1805         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
   1806                                     GA->getOffset() -
   1807                                       (uint64_t)N1C->getSExtValue());
   1808       // fold (sub Sym+c1, Sym+c2) -> c1-c2
   1809       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
   1810         if (GA->getGlobal() == GB->getGlobal())
   1811           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
   1812                                  VT);
   1813     }
   1814 
   1815   return SDValue();
   1816 }
   1817 
   1818 SDValue DAGCombiner::visitSUBC(SDNode *N) {
   1819   SDValue N0 = N->getOperand(0);
   1820   SDValue N1 = N->getOperand(1);
   1821   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   1822   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   1823   EVT VT = N0.getValueType();
   1824 
   1825   // If the flag result is dead, turn this into an SUB.
   1826   if (!N->hasAnyUseOfValue(1))
   1827     return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1),
   1828                      DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
   1829                                  MVT::Glue));
   1830 
   1831   // fold (subc x, x) -> 0 + no borrow
   1832   if (N0 == N1)
   1833     return CombineTo(N, DAG.getConstant(0, VT),
   1834                      DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
   1835                                  MVT::Glue));
   1836 
   1837   // fold (subc x, 0) -> x + no borrow
   1838   if (N1C && N1C->isNullValue())
   1839     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
   1840                                         MVT::Glue));
   1841 
   1842   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
   1843   if (N0C && N0C->isAllOnesValue())
   1844     return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0),
   1845                      DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
   1846                                  MVT::Glue));
   1847 
   1848   return SDValue();
   1849 }
   1850 
   1851 SDValue DAGCombiner::visitSUBE(SDNode *N) {
   1852   SDValue N0 = N->getOperand(0);
   1853   SDValue N1 = N->getOperand(1);
   1854   SDValue CarryIn = N->getOperand(2);
   1855 
   1856   // fold (sube x, y, false) -> (subc x, y)
   1857   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
   1858     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
   1859 
   1860   return SDValue();
   1861 }
   1862 
   1863 SDValue DAGCombiner::visitMUL(SDNode *N) {
   1864   SDValue N0 = N->getOperand(0);
   1865   SDValue N1 = N->getOperand(1);
   1866   EVT VT = N0.getValueType();
   1867 
   1868   // fold (mul x, undef) -> 0
   1869   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
   1870     return DAG.getConstant(0, VT);
   1871 
   1872   bool N0IsConst = false;
   1873   bool N1IsConst = false;
   1874   APInt ConstValue0, ConstValue1;
   1875   // fold vector ops
   1876   if (VT.isVector()) {
   1877     SDValue FoldedVOp = SimplifyVBinOp(N);
   1878     if (FoldedVOp.getNode()) return FoldedVOp;
   1879 
   1880     N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
   1881     N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
   1882   } else {
   1883     N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr;
   1884     ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue()
   1885                             : APInt();
   1886     N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr;
   1887     ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue()
   1888                             : APInt();
   1889   }
   1890 
   1891   // fold (mul c1, c2) -> c1*c2
   1892   if (N0IsConst && N1IsConst)
   1893     return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode());
   1894 
   1895   // canonicalize constant to RHS
   1896   if (N0IsConst && !N1IsConst)
   1897     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
   1898   // fold (mul x, 0) -> 0
   1899   if (N1IsConst && ConstValue1 == 0)
   1900     return N1;
   1901   // We require a splat of the entire scalar bit width for non-contiguous
   1902   // bit patterns.
   1903   bool IsFullSplat =
   1904     ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits();
   1905   // fold (mul x, 1) -> x
   1906   if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
   1907     return N0;
   1908   // fold (mul x, -1) -> 0-x
   1909   if (N1IsConst && ConstValue1.isAllOnesValue())
   1910     return DAG.getNode(ISD::SUB, SDLoc(N), VT,
   1911                        DAG.getConstant(0, VT), N0);
   1912   // fold (mul x, (1 << c)) -> x << c
   1913   if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat)
   1914     return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
   1915                        DAG.getConstant(ConstValue1.logBase2(),
   1916                                        getShiftAmountTy(N0.getValueType())));
   1917   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
   1918   if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) {
   1919     unsigned Log2Val = (-ConstValue1).logBase2();
   1920     // FIXME: If the input is something that is easily negated (e.g. a
   1921     // single-use add), we should put the negate there.
   1922     return DAG.getNode(ISD::SUB, SDLoc(N), VT,
   1923                        DAG.getConstant(0, VT),
   1924                        DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
   1925                             DAG.getConstant(Log2Val,
   1926                                       getShiftAmountTy(N0.getValueType()))));
   1927   }
   1928 
   1929   APInt Val;
   1930   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
   1931   if (N1IsConst && N0.getOpcode() == ISD::SHL &&
   1932       (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
   1933                      isa<ConstantSDNode>(N0.getOperand(1)))) {
   1934     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT,
   1935                              N1, N0.getOperand(1));
   1936     AddToWorkList(C3.getNode());
   1937     return DAG.getNode(ISD::MUL, SDLoc(N), VT,
   1938                        N0.getOperand(0), C3);
   1939   }
   1940 
   1941   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
   1942   // use.
   1943   {
   1944     SDValue Sh(nullptr,0), Y(nullptr,0);
   1945     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
   1946     if (N0.getOpcode() == ISD::SHL &&
   1947         (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
   1948                        isa<ConstantSDNode>(N0.getOperand(1))) &&
   1949         N0.getNode()->hasOneUse()) {
   1950       Sh = N0; Y = N1;
   1951     } else if (N1.getOpcode() == ISD::SHL &&
   1952                isa<ConstantSDNode>(N1.getOperand(1)) &&
   1953                N1.getNode()->hasOneUse()) {
   1954       Sh = N1; Y = N0;
   1955     }
   1956 
   1957     if (Sh.getNode()) {
   1958       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
   1959                                 Sh.getOperand(0), Y);
   1960       return DAG.getNode(ISD::SHL, SDLoc(N), VT,
   1961                          Mul, Sh.getOperand(1));
   1962     }
   1963   }
   1964 
   1965   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
   1966   if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
   1967       (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
   1968                      isa<ConstantSDNode>(N0.getOperand(1))))
   1969     return DAG.getNode(ISD::ADD, SDLoc(N), VT,
   1970                        DAG.getNode(ISD::MUL, SDLoc(N0), VT,
   1971                                    N0.getOperand(0), N1),
   1972                        DAG.getNode(ISD::MUL, SDLoc(N1), VT,
   1973                                    N0.getOperand(1), N1));
   1974 
   1975   // reassociate mul
   1976   SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1);
   1977   if (RMUL.getNode())
   1978     return RMUL;
   1979 
   1980   return SDValue();
   1981 }
   1982 
   1983 SDValue DAGCombiner::visitSDIV(SDNode *N) {
   1984   SDValue N0 = N->getOperand(0);
   1985   SDValue N1 = N->getOperand(1);
   1986   ConstantSDNode *N0C = isConstOrConstSplat(N0);
   1987   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   1988   EVT VT = N->getValueType(0);
   1989 
   1990   // fold vector ops
   1991   if (VT.isVector()) {
   1992     SDValue FoldedVOp = SimplifyVBinOp(N);
   1993     if (FoldedVOp.getNode()) return FoldedVOp;
   1994   }
   1995 
   1996   // fold (sdiv c1, c2) -> c1/c2
   1997   if (N0C && N1C && !N1C->isNullValue())
   1998     return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
   1999   // fold (sdiv X, 1) -> X
   2000   if (N1C && N1C->getAPIntValue() == 1LL)
   2001     return N0;
   2002   // fold (sdiv X, -1) -> 0-X
   2003   if (N1C && N1C->isAllOnesValue())
   2004     return DAG.getNode(ISD::SUB, SDLoc(N), VT,
   2005                        DAG.getConstant(0, VT), N0);
   2006   // If we know the sign bits of both operands are zero, strength reduce to a
   2007   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
   2008   if (!VT.isVector()) {
   2009     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
   2010       return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(),
   2011                          N0, N1);
   2012   }
   2013 
   2014   // fold (sdiv X, pow2) -> simple ops after legalize
   2015   if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() ||
   2016                                      (-N1C->getAPIntValue()).isPowerOf2())) {
   2017     // If dividing by powers of two is cheap, then don't perform the following
   2018     // fold.
   2019     if (TLI.isPow2DivCheap())
   2020       return SDValue();
   2021 
   2022     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
   2023 
   2024     // Splat the sign bit into the register
   2025     SDValue SGN =
   2026         DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
   2027                     DAG.getConstant(VT.getScalarSizeInBits() - 1,
   2028                                     getShiftAmountTy(N0.getValueType())));
   2029     AddToWorkList(SGN.getNode());
   2030 
   2031     // Add (N0 < 0) ? abs2 - 1 : 0;
   2032     SDValue SRL =
   2033         DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN,
   2034                     DAG.getConstant(VT.getScalarSizeInBits() - lg2,
   2035                                     getShiftAmountTy(SGN.getValueType())));
   2036     SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL);
   2037     AddToWorkList(SRL.getNode());
   2038     AddToWorkList(ADD.getNode());    // Divide by pow2
   2039     SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD,
   2040                   DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
   2041 
   2042     // If we're dividing by a positive value, we're done.  Otherwise, we must
   2043     // negate the result.
   2044     if (N1C->getAPIntValue().isNonNegative())
   2045       return SRA;
   2046 
   2047     AddToWorkList(SRA.getNode());
   2048     return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA);
   2049   }
   2050 
   2051   // if integer divide is expensive and we satisfy the requirements, emit an
   2052   // alternate sequence.
   2053   if (N1C && !TLI.isIntDivCheap()) {
   2054     SDValue Op = BuildSDIV(N);
   2055     if (Op.getNode()) return Op;
   2056   }
   2057 
   2058   // undef / X -> 0
   2059   if (N0.getOpcode() == ISD::UNDEF)
   2060     return DAG.getConstant(0, VT);
   2061   // X / undef -> undef
   2062   if (N1.getOpcode() == ISD::UNDEF)
   2063     return N1;
   2064 
   2065   return SDValue();
   2066 }
   2067 
   2068 SDValue DAGCombiner::visitUDIV(SDNode *N) {
   2069   SDValue N0 = N->getOperand(0);
   2070   SDValue N1 = N->getOperand(1);
   2071   ConstantSDNode *N0C = isConstOrConstSplat(N0);
   2072   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   2073   EVT VT = N->getValueType(0);
   2074 
   2075   // fold vector ops
   2076   if (VT.isVector()) {
   2077     SDValue FoldedVOp = SimplifyVBinOp(N);
   2078     if (FoldedVOp.getNode()) return FoldedVOp;
   2079   }
   2080 
   2081   // fold (udiv c1, c2) -> c1/c2
   2082   if (N0C && N1C && !N1C->isNullValue())
   2083     return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
   2084   // fold (udiv x, (1 << c)) -> x >>u c
   2085   if (N1C && N1C->getAPIntValue().isPowerOf2())
   2086     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0,
   2087                        DAG.getConstant(N1C->getAPIntValue().logBase2(),
   2088                                        getShiftAmountTy(N0.getValueType())));
   2089   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
   2090   if (N1.getOpcode() == ISD::SHL) {
   2091     if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
   2092       if (SHC->getAPIntValue().isPowerOf2()) {
   2093         EVT ADDVT = N1.getOperand(1).getValueType();
   2094         SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT,
   2095                                   N1.getOperand(1),
   2096                                   DAG.getConstant(SHC->getAPIntValue()
   2097                                                                   .logBase2(),
   2098                                                   ADDVT));
   2099         AddToWorkList(Add.getNode());
   2100         return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add);
   2101       }
   2102     }
   2103   }
   2104   // fold (udiv x, c) -> alternate
   2105   if (N1C && !TLI.isIntDivCheap()) {
   2106     SDValue Op = BuildUDIV(N);
   2107     if (Op.getNode()) return Op;
   2108   }
   2109 
   2110   // undef / X -> 0
   2111   if (N0.getOpcode() == ISD::UNDEF)
   2112     return DAG.getConstant(0, VT);
   2113   // X / undef -> undef
   2114   if (N1.getOpcode() == ISD::UNDEF)
   2115     return N1;
   2116 
   2117   return SDValue();
   2118 }
   2119 
   2120 SDValue DAGCombiner::visitSREM(SDNode *N) {
   2121   SDValue N0 = N->getOperand(0);
   2122   SDValue N1 = N->getOperand(1);
   2123   ConstantSDNode *N0C = isConstOrConstSplat(N0);
   2124   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   2125   EVT VT = N->getValueType(0);
   2126 
   2127   // fold (srem c1, c2) -> c1%c2
   2128   if (N0C && N1C && !N1C->isNullValue())
   2129     return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
   2130   // If we know the sign bits of both operands are zero, strength reduce to a
   2131   // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
   2132   if (!VT.isVector()) {
   2133     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
   2134       return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1);
   2135   }
   2136 
   2137   // If X/C can be simplified by the division-by-constant logic, lower
   2138   // X%C to the equivalent of X-X/C*C.
   2139   if (N1C && !N1C->isNullValue()) {
   2140     SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1);
   2141     AddToWorkList(Div.getNode());
   2142     SDValue OptimizedDiv = combine(Div.getNode());
   2143     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
   2144       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
   2145                                 OptimizedDiv, N1);
   2146       SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
   2147       AddToWorkList(Mul.getNode());
   2148       return Sub;
   2149     }
   2150   }
   2151 
   2152   // undef % X -> 0
   2153   if (N0.getOpcode() == ISD::UNDEF)
   2154     return DAG.getConstant(0, VT);
   2155   // X % undef -> undef
   2156   if (N1.getOpcode() == ISD::UNDEF)
   2157     return N1;
   2158 
   2159   return SDValue();
   2160 }
   2161 
   2162 SDValue DAGCombiner::visitUREM(SDNode *N) {
   2163   SDValue N0 = N->getOperand(0);
   2164   SDValue N1 = N->getOperand(1);
   2165   ConstantSDNode *N0C = isConstOrConstSplat(N0);
   2166   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   2167   EVT VT = N->getValueType(0);
   2168 
   2169   // fold (urem c1, c2) -> c1%c2
   2170   if (N0C && N1C && !N1C->isNullValue())
   2171     return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
   2172   // fold (urem x, pow2) -> (and x, pow2-1)
   2173   if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
   2174     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0,
   2175                        DAG.getConstant(N1C->getAPIntValue()-1,VT));
   2176   // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
   2177   if (N1.getOpcode() == ISD::SHL) {
   2178     if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
   2179       if (SHC->getAPIntValue().isPowerOf2()) {
   2180         SDValue Add =
   2181           DAG.getNode(ISD::ADD, SDLoc(N), VT, N1,
   2182                  DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
   2183                                  VT));
   2184         AddToWorkList(Add.getNode());
   2185         return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add);
   2186       }
   2187     }
   2188   }
   2189 
   2190   // If X/C can be simplified by the division-by-constant logic, lower
   2191   // X%C to the equivalent of X-X/C*C.
   2192   if (N1C && !N1C->isNullValue()) {
   2193     SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1);
   2194     AddToWorkList(Div.getNode());
   2195     SDValue OptimizedDiv = combine(Div.getNode());
   2196     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
   2197       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
   2198                                 OptimizedDiv, N1);
   2199       SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
   2200       AddToWorkList(Mul.getNode());
   2201       return Sub;
   2202     }
   2203   }
   2204 
   2205   // undef % X -> 0
   2206   if (N0.getOpcode() == ISD::UNDEF)
   2207     return DAG.getConstant(0, VT);
   2208   // X % undef -> undef
   2209   if (N1.getOpcode() == ISD::UNDEF)
   2210     return N1;
   2211 
   2212   return SDValue();
   2213 }
   2214 
   2215 SDValue DAGCombiner::visitMULHS(SDNode *N) {
   2216   SDValue N0 = N->getOperand(0);
   2217   SDValue N1 = N->getOperand(1);
   2218   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   2219   EVT VT = N->getValueType(0);
   2220   SDLoc DL(N);
   2221 
   2222   // fold (mulhs x, 0) -> 0
   2223   if (N1C && N1C->isNullValue())
   2224     return N1;
   2225   // fold (mulhs x, 1) -> (sra x, size(x)-1)
   2226   if (N1C && N1C->getAPIntValue() == 1)
   2227     return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0,
   2228                        DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
   2229                                        getShiftAmountTy(N0.getValueType())));
   2230   // fold (mulhs x, undef) -> 0
   2231   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
   2232     return DAG.getConstant(0, VT);
   2233 
   2234   // If the type twice as wide is legal, transform the mulhs to a wider multiply
   2235   // plus a shift.
   2236   if (VT.isSimple() && !VT.isVector()) {
   2237     MVT Simple = VT.getSimpleVT();
   2238     unsigned SimpleSize = Simple.getSizeInBits();
   2239     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
   2240     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
   2241       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
   2242       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
   2243       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
   2244       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
   2245             DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
   2246       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
   2247     }
   2248   }
   2249 
   2250   return SDValue();
   2251 }
   2252 
   2253 SDValue DAGCombiner::visitMULHU(SDNode *N) {
   2254   SDValue N0 = N->getOperand(0);
   2255   SDValue N1 = N->getOperand(1);
   2256   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   2257   EVT VT = N->getValueType(0);
   2258   SDLoc DL(N);
   2259 
   2260   // fold (mulhu x, 0) -> 0
   2261   if (N1C && N1C->isNullValue())
   2262     return N1;
   2263   // fold (mulhu x, 1) -> 0
   2264   if (N1C && N1C->getAPIntValue() == 1)
   2265     return DAG.getConstant(0, N0.getValueType());
   2266   // fold (mulhu x, undef) -> 0
   2267   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
   2268     return DAG.getConstant(0, VT);
   2269 
   2270   // If the type twice as wide is legal, transform the mulhu to a wider multiply
   2271   // plus a shift.
   2272   if (VT.isSimple() && !VT.isVector()) {
   2273     MVT Simple = VT.getSimpleVT();
   2274     unsigned SimpleSize = Simple.getSizeInBits();
   2275     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
   2276     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
   2277       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
   2278       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
   2279       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
   2280       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
   2281             DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
   2282       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
   2283     }
   2284   }
   2285 
   2286   return SDValue();
   2287 }
   2288 
   2289 /// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that
   2290 /// compute two values. LoOp and HiOp give the opcodes for the two computations
   2291 /// that are being performed. Return true if a simplification was made.
   2292 ///
   2293 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
   2294                                                 unsigned HiOp) {
   2295   // If the high half is not needed, just compute the low half.
   2296   bool HiExists = N->hasAnyUseOfValue(1);
   2297   if (!HiExists &&
   2298       (!LegalOperations ||
   2299        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
   2300     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0),
   2301                               ArrayRef<SDUse>(N->op_begin(), N->op_end()));
   2302     return CombineTo(N, Res, Res);
   2303   }
   2304 
   2305   // If the low half is not needed, just compute the high half.
   2306   bool LoExists = N->hasAnyUseOfValue(0);
   2307   if (!LoExists &&
   2308       (!LegalOperations ||
   2309        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
   2310     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1),
   2311                               ArrayRef<SDUse>(N->op_begin(), N->op_end()));
   2312     return CombineTo(N, Res, Res);
   2313   }
   2314 
   2315   // If both halves are used, return as it is.
   2316   if (LoExists && HiExists)
   2317     return SDValue();
   2318 
   2319   // If the two computed results can be simplified separately, separate them.
   2320   if (LoExists) {
   2321     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0),
   2322                              ArrayRef<SDUse>(N->op_begin(), N->op_end()));
   2323     AddToWorkList(Lo.getNode());
   2324     SDValue LoOpt = combine(Lo.getNode());
   2325     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
   2326         (!LegalOperations ||
   2327          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
   2328       return CombineTo(N, LoOpt, LoOpt);
   2329   }
   2330 
   2331   if (HiExists) {
   2332     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1),
   2333                              ArrayRef<SDUse>(N->op_begin(), N->op_end()));
   2334     AddToWorkList(Hi.getNode());
   2335     SDValue HiOpt = combine(Hi.getNode());
   2336     if (HiOpt.getNode() && HiOpt != Hi &&
   2337         (!LegalOperations ||
   2338          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
   2339       return CombineTo(N, HiOpt, HiOpt);
   2340   }
   2341 
   2342   return SDValue();
   2343 }
   2344 
   2345 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
   2346   SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
   2347   if (Res.getNode()) return Res;
   2348 
   2349   EVT VT = N->getValueType(0);
   2350   SDLoc DL(N);
   2351 
   2352   // If the type twice as wide is legal, transform the mulhu to a wider multiply
   2353   // plus a shift.
   2354   if (VT.isSimple() && !VT.isVector()) {
   2355     MVT Simple = VT.getSimpleVT();
   2356     unsigned SimpleSize = Simple.getSizeInBits();
   2357     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
   2358     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
   2359       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
   2360       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
   2361       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
   2362       // Compute the high part as N1.
   2363       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
   2364             DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
   2365       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
   2366       // Compute the low part as N0.
   2367       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
   2368       return CombineTo(N, Lo, Hi);
   2369     }
   2370   }
   2371 
   2372   return SDValue();
   2373 }
   2374 
   2375 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
   2376   SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
   2377   if (Res.getNode()) return Res;
   2378 
   2379   EVT VT = N->getValueType(0);
   2380   SDLoc DL(N);
   2381 
   2382   // If the type twice as wide is legal, transform the mulhu to a wider multiply
   2383   // plus a shift.
   2384   if (VT.isSimple() && !VT.isVector()) {
   2385     MVT Simple = VT.getSimpleVT();
   2386     unsigned SimpleSize = Simple.getSizeInBits();
   2387     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
   2388     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
   2389       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
   2390       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
   2391       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
   2392       // Compute the high part as N1.
   2393       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
   2394             DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
   2395       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
   2396       // Compute the low part as N0.
   2397       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
   2398       return CombineTo(N, Lo, Hi);
   2399     }
   2400   }
   2401 
   2402   return SDValue();
   2403 }
   2404 
   2405 SDValue DAGCombiner::visitSMULO(SDNode *N) {
   2406   // (smulo x, 2) -> (saddo x, x)
   2407   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
   2408     if (C2->getAPIntValue() == 2)
   2409       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
   2410                          N->getOperand(0), N->getOperand(0));
   2411 
   2412   return SDValue();
   2413 }
   2414 
   2415 SDValue DAGCombiner::visitUMULO(SDNode *N) {
   2416   // (umulo x, 2) -> (uaddo x, x)
   2417   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
   2418     if (C2->getAPIntValue() == 2)
   2419       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
   2420                          N->getOperand(0), N->getOperand(0));
   2421 
   2422   return SDValue();
   2423 }
   2424 
   2425 SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
   2426   SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
   2427   if (Res.getNode()) return Res;
   2428 
   2429   return SDValue();
   2430 }
   2431 
   2432 SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
   2433   SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
   2434   if (Res.getNode()) return Res;
   2435 
   2436   return SDValue();
   2437 }
   2438 
   2439 /// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with
   2440 /// two operands of the same opcode, try to simplify it.
   2441 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
   2442   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
   2443   EVT VT = N0.getValueType();
   2444   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
   2445 
   2446   // Bail early if none of these transforms apply.
   2447   if (N0.getNode()->getNumOperands() == 0) return SDValue();
   2448 
   2449   // For each of OP in AND/OR/XOR:
   2450   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
   2451   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
   2452   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
   2453   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
   2454   //
   2455   // do not sink logical op inside of a vector extend, since it may combine
   2456   // into a vsetcc.
   2457   EVT Op0VT = N0.getOperand(0).getValueType();
   2458   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
   2459        N0.getOpcode() == ISD::SIGN_EXTEND ||
   2460        // Avoid infinite looping with PromoteIntBinOp.
   2461        (N0.getOpcode() == ISD::ANY_EXTEND &&
   2462         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
   2463        (N0.getOpcode() == ISD::TRUNCATE &&
   2464         (!TLI.isZExtFree(VT, Op0VT) ||
   2465          !TLI.isTruncateFree(Op0VT, VT)) &&
   2466         TLI.isTypeLegal(Op0VT))) &&
   2467       !VT.isVector() &&
   2468       Op0VT == N1.getOperand(0).getValueType() &&
   2469       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
   2470     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
   2471                                  N0.getOperand(0).getValueType(),
   2472                                  N0.getOperand(0), N1.getOperand(0));
   2473     AddToWorkList(ORNode.getNode());
   2474     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
   2475   }
   2476 
   2477   // For each of OP in SHL/SRL/SRA/AND...
   2478   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
   2479   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
   2480   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
   2481   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
   2482        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
   2483       N0.getOperand(1) == N1.getOperand(1)) {
   2484     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
   2485                                  N0.getOperand(0).getValueType(),
   2486                                  N0.getOperand(0), N1.getOperand(0));
   2487     AddToWorkList(ORNode.getNode());
   2488     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
   2489                        ORNode, N0.getOperand(1));
   2490   }
   2491 
   2492   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
   2493   // Only perform this optimization after type legalization and before
   2494   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
   2495   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
   2496   // we don't want to undo this promotion.
   2497   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
   2498   // on scalars.
   2499   if ((N0.getOpcode() == ISD::BITCAST ||
   2500        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
   2501       Level == AfterLegalizeTypes) {
   2502     SDValue In0 = N0.getOperand(0);
   2503     SDValue In1 = N1.getOperand(0);
   2504     EVT In0Ty = In0.getValueType();
   2505     EVT In1Ty = In1.getValueType();
   2506     SDLoc DL(N);
   2507     // If both incoming values are integers, and the original types are the
   2508     // same.
   2509     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
   2510       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
   2511       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
   2512       AddToWorkList(Op.getNode());
   2513       return BC;
   2514     }
   2515   }
   2516 
   2517   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
   2518   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
   2519   // If both shuffles use the same mask, and both shuffle within a single
   2520   // vector, then it is worthwhile to move the swizzle after the operation.
   2521   // The type-legalizer generates this pattern when loading illegal
   2522   // vector types from memory. In many cases this allows additional shuffle
   2523   // optimizations.
   2524   // There are other cases where moving the shuffle after the xor/and/or
   2525   // is profitable even if shuffles don't perform a swizzle.
   2526   // If both shuffles use the same mask, and both shuffles have the same first
   2527   // or second operand, then it might still be profitable to move the shuffle
   2528   // after the xor/and/or operation.
   2529   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
   2530     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
   2531     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
   2532 
   2533     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
   2534            "Inputs to shuffles are not the same type");
   2535 
   2536     // Check that both shuffles use the same mask. The masks are known to be of
   2537     // the same length because the result vector type is the same.
   2538     // Check also that shuffles have only one use to avoid introducing extra
   2539     // instructions.
   2540     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
   2541         SVN0->getMask().equals(SVN1->getMask())) {
   2542       SDValue ShOp = N0->getOperand(1);
   2543 
   2544       // Don't try to fold this node if it requires introducing a
   2545       // build vector of all zeros that might be illegal at this stage.
   2546       if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
   2547         if (!LegalTypes)
   2548           ShOp = DAG.getConstant(0, VT);
   2549         else
   2550           ShOp = SDValue();
   2551       }
   2552 
   2553       // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
   2554       // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
   2555       // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
   2556       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
   2557         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
   2558                                       N0->getOperand(0), N1->getOperand(0));
   2559         AddToWorkList(NewNode.getNode());
   2560         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
   2561                                     &SVN0->getMask()[0]);
   2562       }
   2563 
   2564       // Don't try to fold this node if it requires introducing a
   2565       // build vector of all zeros that might be illegal at this stage.
   2566       ShOp = N0->getOperand(0);
   2567       if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
   2568         if (!LegalTypes)
   2569           ShOp = DAG.getConstant(0, VT);
   2570         else
   2571           ShOp = SDValue();
   2572       }
   2573 
   2574       // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
   2575       // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
   2576       // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
   2577       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
   2578         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
   2579                                       N0->getOperand(1), N1->getOperand(1));
   2580         AddToWorkList(NewNode.getNode());
   2581         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
   2582                                     &SVN0->getMask()[0]);
   2583       }
   2584     }
   2585   }
   2586 
   2587   return SDValue();
   2588 }
   2589 
   2590 SDValue DAGCombiner::visitAND(SDNode *N) {
   2591   SDValue N0 = N->getOperand(0);
   2592   SDValue N1 = N->getOperand(1);
   2593   SDValue LL, LR, RL, RR, CC0, CC1;
   2594   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   2595   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   2596   EVT VT = N1.getValueType();
   2597   unsigned BitWidth = VT.getScalarType().getSizeInBits();
   2598 
   2599   // fold vector ops
   2600   if (VT.isVector()) {
   2601     SDValue FoldedVOp = SimplifyVBinOp(N);
   2602     if (FoldedVOp.getNode()) return FoldedVOp;
   2603 
   2604     // fold (and x, 0) -> 0, vector edition
   2605     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   2606       return N0;
   2607     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   2608       return N1;
   2609 
   2610     // fold (and x, -1) -> x, vector edition
   2611     if (ISD::isBuildVectorAllOnes(N0.getNode()))
   2612       return N1;
   2613     if (ISD::isBuildVectorAllOnes(N1.getNode()))
   2614       return N0;
   2615   }
   2616 
   2617   // fold (and x, undef) -> 0
   2618   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
   2619     return DAG.getConstant(0, VT);
   2620   // fold (and c1, c2) -> c1&c2
   2621   if (N0C && N1C)
   2622     return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
   2623   // canonicalize constant to RHS
   2624   if (N0C && !N1C)
   2625     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
   2626   // fold (and x, -1) -> x
   2627   if (N1C && N1C->isAllOnesValue())
   2628     return N0;
   2629   // if (and x, c) is known to be zero, return 0
   2630   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
   2631                                    APInt::getAllOnesValue(BitWidth)))
   2632     return DAG.getConstant(0, VT);
   2633   // reassociate and
   2634   SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1);
   2635   if (RAND.getNode())
   2636     return RAND;
   2637   // fold (and (or x, C), D) -> D if (C & D) == D
   2638   if (N1C && N0.getOpcode() == ISD::OR)
   2639     if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
   2640       if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
   2641         return N1;
   2642   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
   2643   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
   2644     SDValue N0Op0 = N0.getOperand(0);
   2645     APInt Mask = ~N1C->getAPIntValue();
   2646     Mask = Mask.trunc(N0Op0.getValueSizeInBits());
   2647     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
   2648       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
   2649                                  N0.getValueType(), N0Op0);
   2650 
   2651       // Replace uses of the AND with uses of the Zero extend node.
   2652       CombineTo(N, Zext);
   2653 
   2654       // We actually want to replace all uses of the any_extend with the
   2655       // zero_extend, to avoid duplicating things.  This will later cause this
   2656       // AND to be folded.
   2657       CombineTo(N0.getNode(), Zext);
   2658       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   2659     }
   2660   }
   2661   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
   2662   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
   2663   // already be zero by virtue of the width of the base type of the load.
   2664   //
   2665   // the 'X' node here can either be nothing or an extract_vector_elt to catch
   2666   // more cases.
   2667   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
   2668        N0.getOperand(0).getOpcode() == ISD::LOAD) ||
   2669       N0.getOpcode() == ISD::LOAD) {
   2670     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
   2671                                          N0 : N0.getOperand(0) );
   2672 
   2673     // Get the constant (if applicable) the zero'th operand is being ANDed with.
   2674     // This can be a pure constant or a vector splat, in which case we treat the
   2675     // vector as a scalar and use the splat value.
   2676     APInt Constant = APInt::getNullValue(1);
   2677     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
   2678       Constant = C->getAPIntValue();
   2679     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
   2680       APInt SplatValue, SplatUndef;
   2681       unsigned SplatBitSize;
   2682       bool HasAnyUndefs;
   2683       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
   2684                                              SplatBitSize, HasAnyUndefs);
   2685       if (IsSplat) {
   2686         // Undef bits can contribute to a possible optimisation if set, so
   2687         // set them.
   2688         SplatValue |= SplatUndef;
   2689 
   2690         // The splat value may be something like "0x00FFFFFF", which means 0 for
   2691         // the first vector value and FF for the rest, repeating. We need a mask
   2692         // that will apply equally to all members of the vector, so AND all the
   2693         // lanes of the constant together.
   2694         EVT VT = Vector->getValueType(0);
   2695         unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
   2696 
   2697         // If the splat value has been compressed to a bitlength lower
   2698         // than the size of the vector lane, we need to re-expand it to
   2699         // the lane size.
   2700         if (BitWidth > SplatBitSize)
   2701           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
   2702                SplatBitSize < BitWidth;
   2703                SplatBitSize = SplatBitSize * 2)
   2704             SplatValue |= SplatValue.shl(SplatBitSize);
   2705 
   2706         Constant = APInt::getAllOnesValue(BitWidth);
   2707         for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
   2708           Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
   2709       }
   2710     }
   2711 
   2712     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
   2713     // actually legal and isn't going to get expanded, else this is a false
   2714     // optimisation.
   2715     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
   2716                                                     Load->getMemoryVT());
   2717 
   2718     // Resize the constant to the same size as the original memory access before
   2719     // extension. If it is still the AllOnesValue then this AND is completely
   2720     // unneeded.
   2721     Constant =
   2722       Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
   2723 
   2724     bool B;
   2725     switch (Load->getExtensionType()) {
   2726     default: B = false; break;
   2727     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
   2728     case ISD::ZEXTLOAD:
   2729     case ISD::NON_EXTLOAD: B = true; break;
   2730     }
   2731 
   2732     if (B && Constant.isAllOnesValue()) {
   2733       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
   2734       // preserve semantics once we get rid of the AND.
   2735       SDValue NewLoad(Load, 0);
   2736       if (Load->getExtensionType() == ISD::EXTLOAD) {
   2737         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
   2738                               Load->getValueType(0), SDLoc(Load),
   2739                               Load->getChain(), Load->getBasePtr(),
   2740                               Load->getOffset(), Load->getMemoryVT(),
   2741                               Load->getMemOperand());
   2742         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
   2743         if (Load->getNumValues() == 3) {
   2744           // PRE/POST_INC loads have 3 values.
   2745           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
   2746                            NewLoad.getValue(2) };
   2747           CombineTo(Load, To, 3, true);
   2748         } else {
   2749           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
   2750         }
   2751       }
   2752 
   2753       // Fold the AND away, taking care not to fold to the old load node if we
   2754       // replaced it.
   2755       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
   2756 
   2757       return SDValue(N, 0); // Return N so it doesn't get rechecked!
   2758     }
   2759   }
   2760   // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
   2761   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
   2762     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
   2763     ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
   2764 
   2765     if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
   2766         LL.getValueType().isInteger()) {
   2767       // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
   2768       if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
   2769         SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
   2770                                      LR.getValueType(), LL, RL);
   2771         AddToWorkList(ORNode.getNode());
   2772         return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
   2773       }
   2774       // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
   2775       if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
   2776         SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
   2777                                       LR.getValueType(), LL, RL);
   2778         AddToWorkList(ANDNode.getNode());
   2779         return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
   2780       }
   2781       // fold (and (setgt X,  -1), (setgt Y,  -1)) -> (setgt (or X, Y), -1)
   2782       if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
   2783         SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
   2784                                      LR.getValueType(), LL, RL);
   2785         AddToWorkList(ORNode.getNode());
   2786         return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
   2787       }
   2788     }
   2789     // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
   2790     if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
   2791         Op0 == Op1 && LL.getValueType().isInteger() &&
   2792       Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() &&
   2793                                  cast<ConstantSDNode>(RR)->isAllOnesValue()) ||
   2794                                 (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
   2795                                  cast<ConstantSDNode>(RR)->isNullValue()))) {
   2796       SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(),
   2797                                     LL, DAG.getConstant(1, LL.getValueType()));
   2798       AddToWorkList(ADDNode.getNode());
   2799       return DAG.getSetCC(SDLoc(N), VT, ADDNode,
   2800                           DAG.getConstant(2, LL.getValueType()), ISD::SETUGE);
   2801     }
   2802     // canonicalize equivalent to ll == rl
   2803     if (LL == RR && LR == RL) {
   2804       Op1 = ISD::getSetCCSwappedOperands(Op1);
   2805       std::swap(RL, RR);
   2806     }
   2807     if (LL == RL && LR == RR) {
   2808       bool isInteger = LL.getValueType().isInteger();
   2809       ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
   2810       if (Result != ISD::SETCC_INVALID &&
   2811           (!LegalOperations ||
   2812            (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
   2813             TLI.isOperationLegal(ISD::SETCC,
   2814                             getSetCCResultType(N0.getSimpleValueType())))))
   2815         return DAG.getSetCC(SDLoc(N), N0.getValueType(),
   2816                             LL, LR, Result);
   2817     }
   2818   }
   2819 
   2820   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
   2821   if (N0.getOpcode() == N1.getOpcode()) {
   2822     SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
   2823     if (Tmp.getNode()) return Tmp;
   2824   }
   2825 
   2826   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
   2827   // fold (and (sra)) -> (and (srl)) when possible.
   2828   if (!VT.isVector() &&
   2829       SimplifyDemandedBits(SDValue(N, 0)))
   2830     return SDValue(N, 0);
   2831 
   2832   // fold (zext_inreg (extload x)) -> (zextload x)
   2833   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
   2834     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   2835     EVT MemVT = LN0->getMemoryVT();
   2836     // If we zero all the possible extended bits, then we can turn this into
   2837     // a zextload if we are running before legalize or the operation is legal.
   2838     unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
   2839     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
   2840                            BitWidth - MemVT.getScalarType().getSizeInBits())) &&
   2841         ((!LegalOperations && !LN0->isVolatile()) ||
   2842          TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
   2843       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
   2844                                        LN0->getChain(), LN0->getBasePtr(),
   2845                                        MemVT, LN0->getMemOperand());
   2846       AddToWorkList(N);
   2847       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
   2848       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   2849     }
   2850   }
   2851   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
   2852   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
   2853       N0.hasOneUse()) {
   2854     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   2855     EVT MemVT = LN0->getMemoryVT();
   2856     // If we zero all the possible extended bits, then we can turn this into
   2857     // a zextload if we are running before legalize or the operation is legal.
   2858     unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
   2859     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
   2860                            BitWidth - MemVT.getScalarType().getSizeInBits())) &&
   2861         ((!LegalOperations && !LN0->isVolatile()) ||
   2862          TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
   2863       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
   2864                                        LN0->getChain(), LN0->getBasePtr(),
   2865                                        MemVT, LN0->getMemOperand());
   2866       AddToWorkList(N);
   2867       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
   2868       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   2869     }
   2870   }
   2871 
   2872   // fold (and (load x), 255) -> (zextload x, i8)
   2873   // fold (and (extload x, i16), 255) -> (zextload x, i8)
   2874   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
   2875   if (N1C && (N0.getOpcode() == ISD::LOAD ||
   2876               (N0.getOpcode() == ISD::ANY_EXTEND &&
   2877                N0.getOperand(0).getOpcode() == ISD::LOAD))) {
   2878     bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
   2879     LoadSDNode *LN0 = HasAnyExt
   2880       ? cast<LoadSDNode>(N0.getOperand(0))
   2881       : cast<LoadSDNode>(N0);
   2882     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
   2883         LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
   2884       uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
   2885       if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
   2886         EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
   2887         EVT LoadedVT = LN0->getMemoryVT();
   2888 
   2889         if (ExtVT == LoadedVT &&
   2890             (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
   2891           EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
   2892 
   2893           SDValue NewLoad =
   2894             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
   2895                            LN0->getChain(), LN0->getBasePtr(), ExtVT,
   2896                            LN0->getMemOperand());
   2897           AddToWorkList(N);
   2898           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
   2899           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   2900         }
   2901 
   2902         // Do not change the width of a volatile load.
   2903         // Do not generate loads of non-round integer types since these can
   2904         // be expensive (and would be wrong if the type is not byte sized).
   2905         if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
   2906             (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
   2907           EVT PtrType = LN0->getOperand(1).getValueType();
   2908 
   2909           unsigned Alignment = LN0->getAlignment();
   2910           SDValue NewPtr = LN0->getBasePtr();
   2911 
   2912           // For big endian targets, we need to add an offset to the pointer
   2913           // to load the correct bytes.  For little endian systems, we merely
   2914           // need to read fewer bytes from the same pointer.
   2915           if (TLI.isBigEndian()) {
   2916             unsigned LVTStoreBytes = LoadedVT.getStoreSize();
   2917             unsigned EVTStoreBytes = ExtVT.getStoreSize();
   2918             unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
   2919             NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType,
   2920                                  NewPtr, DAG.getConstant(PtrOff, PtrType));
   2921             Alignment = MinAlign(Alignment, PtrOff);
   2922           }
   2923 
   2924           AddToWorkList(NewPtr.getNode());
   2925 
   2926           EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
   2927           SDValue Load =
   2928             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
   2929                            LN0->getChain(), NewPtr,
   2930                            LN0->getPointerInfo(),
   2931                            ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
   2932                            Alignment, LN0->getTBAAInfo());
   2933           AddToWorkList(N);
   2934           CombineTo(LN0, Load, Load.getValue(1));
   2935           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   2936         }
   2937       }
   2938     }
   2939   }
   2940 
   2941   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
   2942       VT.getSizeInBits() <= 64) {
   2943     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
   2944       APInt ADDC = ADDI->getAPIntValue();
   2945       if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
   2946         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
   2947         // immediate for an add, but it is legal if its top c2 bits are set,
   2948         // transform the ADD so the immediate doesn't need to be materialized
   2949         // in a register.
   2950         if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
   2951           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
   2952                                              SRLI->getZExtValue());
   2953           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
   2954             ADDC |= Mask;
   2955             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
   2956               SDValue NewAdd =
   2957                 DAG.getNode(ISD::ADD, SDLoc(N0), VT,
   2958                             N0.getOperand(0), DAG.getConstant(ADDC, VT));
   2959               CombineTo(N0.getNode(), NewAdd);
   2960               return SDValue(N, 0); // Return N so it doesn't get rechecked!
   2961             }
   2962           }
   2963         }
   2964       }
   2965     }
   2966   }
   2967 
   2968   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
   2969   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
   2970     SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
   2971                                        N0.getOperand(1), false);
   2972     if (BSwap.getNode())
   2973       return BSwap;
   2974   }
   2975 
   2976   return SDValue();
   2977 }
   2978 
   2979 /// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16
   2980 ///
   2981 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
   2982                                         bool DemandHighBits) {
   2983   if (!LegalOperations)
   2984     return SDValue();
   2985 
   2986   EVT VT = N->getValueType(0);
   2987   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
   2988     return SDValue();
   2989   if (!TLI.isOperationLegal(ISD::BSWAP, VT))
   2990     return SDValue();
   2991 
   2992   // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
   2993   bool LookPassAnd0 = false;
   2994   bool LookPassAnd1 = false;
   2995   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
   2996       std::swap(N0, N1);
   2997   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
   2998       std::swap(N0, N1);
   2999   if (N0.getOpcode() == ISD::AND) {
   3000     if (!N0.getNode()->hasOneUse())
   3001       return SDValue();
   3002     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   3003     if (!N01C || N01C->getZExtValue() != 0xFF00)
   3004       return SDValue();
   3005     N0 = N0.getOperand(0);
   3006     LookPassAnd0 = true;
   3007   }
   3008 
   3009   if (N1.getOpcode() == ISD::AND) {
   3010     if (!N1.getNode()->hasOneUse())
   3011       return SDValue();
   3012     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
   3013     if (!N11C || N11C->getZExtValue() != 0xFF)
   3014       return SDValue();
   3015     N1 = N1.getOperand(0);
   3016     LookPassAnd1 = true;
   3017   }
   3018 
   3019   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
   3020     std::swap(N0, N1);
   3021   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
   3022     return SDValue();
   3023   if (!N0.getNode()->hasOneUse() ||
   3024       !N1.getNode()->hasOneUse())
   3025     return SDValue();
   3026 
   3027   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   3028   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
   3029   if (!N01C || !N11C)
   3030     return SDValue();
   3031   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
   3032     return SDValue();
   3033 
   3034   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
   3035   SDValue N00 = N0->getOperand(0);
   3036   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
   3037     if (!N00.getNode()->hasOneUse())
   3038       return SDValue();
   3039     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
   3040     if (!N001C || N001C->getZExtValue() != 0xFF)
   3041       return SDValue();
   3042     N00 = N00.getOperand(0);
   3043     LookPassAnd0 = true;
   3044   }
   3045 
   3046   SDValue N10 = N1->getOperand(0);
   3047   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
   3048     if (!N10.getNode()->hasOneUse())
   3049       return SDValue();
   3050     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
   3051     if (!N101C || N101C->getZExtValue() != 0xFF00)
   3052       return SDValue();
   3053     N10 = N10.getOperand(0);
   3054     LookPassAnd1 = true;
   3055   }
   3056 
   3057   if (N00 != N10)
   3058     return SDValue();
   3059 
   3060   // Make sure everything beyond the low halfword gets set to zero since the SRL
   3061   // 16 will clear the top bits.
   3062   unsigned OpSizeInBits = VT.getSizeInBits();
   3063   if (DemandHighBits && OpSizeInBits > 16) {
   3064     // If the left-shift isn't masked out then the only way this is a bswap is
   3065     // if all bits beyond the low 8 are 0. In that case the entire pattern
   3066     // reduces to a left shift anyway: leave it for other parts of the combiner.
   3067     if (!LookPassAnd0)
   3068       return SDValue();
   3069 
   3070     // However, if the right shift isn't masked out then it might be because
   3071     // it's not needed. See if we can spot that too.
   3072     if (!LookPassAnd1 &&
   3073         !DAG.MaskedValueIsZero(
   3074             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
   3075       return SDValue();
   3076   }
   3077 
   3078   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
   3079   if (OpSizeInBits > 16)
   3080     Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res,
   3081                       DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT)));
   3082   return Res;
   3083 }
   3084 
   3085 /// isBSwapHWordElement - Return true if the specified node is an element
   3086 /// that makes up a 32-bit packed halfword byteswap. i.e.
   3087 /// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
   3088 static bool isBSwapHWordElement(SDValue N, SmallVectorImpl<SDNode *> &Parts) {
   3089   if (!N.getNode()->hasOneUse())
   3090     return false;
   3091 
   3092   unsigned Opc = N.getOpcode();
   3093   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
   3094     return false;
   3095 
   3096   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
   3097   if (!N1C)
   3098     return false;
   3099 
   3100   unsigned Num;
   3101   switch (N1C->getZExtValue()) {
   3102   default:
   3103     return false;
   3104   case 0xFF:       Num = 0; break;
   3105   case 0xFF00:     Num = 1; break;
   3106   case 0xFF0000:   Num = 2; break;
   3107   case 0xFF000000: Num = 3; break;
   3108   }
   3109 
   3110   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
   3111   SDValue N0 = N.getOperand(0);
   3112   if (Opc == ISD::AND) {
   3113     if (Num == 0 || Num == 2) {
   3114       // (x >> 8) & 0xff
   3115       // (x >> 8) & 0xff0000
   3116       if (N0.getOpcode() != ISD::SRL)
   3117         return false;
   3118       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   3119       if (!C || C->getZExtValue() != 8)
   3120         return false;
   3121     } else {
   3122       // (x << 8) & 0xff00
   3123       // (x << 8) & 0xff000000
   3124       if (N0.getOpcode() != ISD::SHL)
   3125         return false;
   3126       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   3127       if (!C || C->getZExtValue() != 8)
   3128         return false;
   3129     }
   3130   } else if (Opc == ISD::SHL) {
   3131     // (x & 0xff) << 8
   3132     // (x & 0xff0000) << 8
   3133     if (Num != 0 && Num != 2)
   3134       return false;
   3135     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
   3136     if (!C || C->getZExtValue() != 8)
   3137       return false;
   3138   } else { // Opc == ISD::SRL
   3139     // (x & 0xff00) >> 8
   3140     // (x & 0xff000000) >> 8
   3141     if (Num != 1 && Num != 3)
   3142       return false;
   3143     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
   3144     if (!C || C->getZExtValue() != 8)
   3145       return false;
   3146   }
   3147 
   3148   if (Parts[Num])
   3149     return false;
   3150 
   3151   Parts[Num] = N0.getOperand(0).getNode();
   3152   return true;
   3153 }
   3154 
   3155 /// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is
   3156 /// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
   3157 /// => (rotl (bswap x), 16)
   3158 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
   3159   if (!LegalOperations)
   3160     return SDValue();
   3161 
   3162   EVT VT = N->getValueType(0);
   3163   if (VT != MVT::i32)
   3164     return SDValue();
   3165   if (!TLI.isOperationLegal(ISD::BSWAP, VT))
   3166     return SDValue();
   3167 
   3168   SmallVector<SDNode*,4> Parts(4, (SDNode*)nullptr);
   3169   // Look for either
   3170   // (or (or (and), (and)), (or (and), (and)))
   3171   // (or (or (or (and), (and)), (and)), (and))
   3172   if (N0.getOpcode() != ISD::OR)
   3173     return SDValue();
   3174   SDValue N00 = N0.getOperand(0);
   3175   SDValue N01 = N0.getOperand(1);
   3176 
   3177   if (N1.getOpcode() == ISD::OR &&
   3178       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
   3179     // (or (or (and), (and)), (or (and), (and)))
   3180     SDValue N000 = N00.getOperand(0);
   3181     if (!isBSwapHWordElement(N000, Parts))
   3182       return SDValue();
   3183 
   3184     SDValue N001 = N00.getOperand(1);
   3185     if (!isBSwapHWordElement(N001, Parts))
   3186       return SDValue();
   3187     SDValue N010 = N01.getOperand(0);
   3188     if (!isBSwapHWordElement(N010, Parts))
   3189       return SDValue();
   3190     SDValue N011 = N01.getOperand(1);
   3191     if (!isBSwapHWordElement(N011, Parts))
   3192       return SDValue();
   3193   } else {
   3194     // (or (or (or (and), (and)), (and)), (and))
   3195     if (!isBSwapHWordElement(N1, Parts))
   3196       return SDValue();
   3197     if (!isBSwapHWordElement(N01, Parts))
   3198       return SDValue();
   3199     if (N00.getOpcode() != ISD::OR)
   3200       return SDValue();
   3201     SDValue N000 = N00.getOperand(0);
   3202     if (!isBSwapHWordElement(N000, Parts))
   3203       return SDValue();
   3204     SDValue N001 = N00.getOperand(1);
   3205     if (!isBSwapHWordElement(N001, Parts))
   3206       return SDValue();
   3207   }
   3208 
   3209   // Make sure the parts are all coming from the same node.
   3210   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
   3211     return SDValue();
   3212 
   3213   SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT,
   3214                               SDValue(Parts[0],0));
   3215 
   3216   // Result of the bswap should be rotated by 16. If it's not legal, then
   3217   // do  (x << 16) | (x >> 16).
   3218   SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
   3219   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
   3220     return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt);
   3221   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
   3222     return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt);
   3223   return DAG.getNode(ISD::OR, SDLoc(N), VT,
   3224                      DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt),
   3225                      DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt));
   3226 }
   3227 
   3228 SDValue DAGCombiner::visitOR(SDNode *N) {
   3229   SDValue N0 = N->getOperand(0);
   3230   SDValue N1 = N->getOperand(1);
   3231   SDValue LL, LR, RL, RR, CC0, CC1;
   3232   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   3233   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   3234   EVT VT = N1.getValueType();
   3235 
   3236   // fold vector ops
   3237   if (VT.isVector()) {
   3238     SDValue FoldedVOp = SimplifyVBinOp(N);
   3239     if (FoldedVOp.getNode()) return FoldedVOp;
   3240 
   3241     // fold (or x, 0) -> x, vector edition
   3242     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   3243       return N1;
   3244     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   3245       return N0;
   3246 
   3247     // fold (or x, -1) -> -1, vector edition
   3248     if (ISD::isBuildVectorAllOnes(N0.getNode()))
   3249       return N0;
   3250     if (ISD::isBuildVectorAllOnes(N1.getNode()))
   3251       return N1;
   3252 
   3253     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
   3254     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2)
   3255     // Do this only if the resulting shuffle is legal.
   3256     if (isa<ShuffleVectorSDNode>(N0) &&
   3257         isa<ShuffleVectorSDNode>(N1) &&
   3258         N0->getOperand(1) == N1->getOperand(1) &&
   3259         ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) {
   3260       bool CanFold = true;
   3261       unsigned NumElts = VT.getVectorNumElements();
   3262       const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
   3263       const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
   3264       // We construct two shuffle masks:
   3265       // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand
   3266       // and N1 as the second operand.
   3267       // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand
   3268       // and N0 as the second operand.
   3269       // We do this because OR is commutable and therefore there might be
   3270       // two ways to fold this node into a shuffle.
   3271       SmallVector<int,4> Mask1;
   3272       SmallVector<int,4> Mask2;
   3273 
   3274       for (unsigned i = 0; i != NumElts && CanFold; ++i) {
   3275         int M0 = SV0->getMaskElt(i);
   3276         int M1 = SV1->getMaskElt(i);
   3277 
   3278         // Both shuffle indexes are undef. Propagate Undef.
   3279         if (M0 < 0 && M1 < 0) {
   3280           Mask1.push_back(M0);
   3281           Mask2.push_back(M0);
   3282           continue;
   3283         }
   3284 
   3285         if (M0 < 0 || M1 < 0 ||
   3286             (M0 < (int)NumElts && M1 < (int)NumElts) ||
   3287             (M0 >= (int)NumElts && M1 >= (int)NumElts)) {
   3288           CanFold = false;
   3289           break;
   3290         }
   3291 
   3292         Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts);
   3293         Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts);
   3294       }
   3295 
   3296       if (CanFold) {
   3297         // Fold this sequence only if the resulting shuffle is 'legal'.
   3298         if (TLI.isShuffleMaskLegal(Mask1, VT))
   3299           return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0),
   3300                                       N1->getOperand(0), &Mask1[0]);
   3301         if (TLI.isShuffleMaskLegal(Mask2, VT))
   3302           return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0),
   3303                                       N0->getOperand(0), &Mask2[0]);
   3304       }
   3305     }
   3306   }
   3307 
   3308   // fold (or x, undef) -> -1
   3309   if (!LegalOperations &&
   3310       (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
   3311     EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
   3312     return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
   3313   }
   3314   // fold (or c1, c2) -> c1|c2
   3315   if (N0C && N1C)
   3316     return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
   3317   // canonicalize constant to RHS
   3318   if (N0C && !N1C)
   3319     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
   3320   // fold (or x, 0) -> x
   3321   if (N1C && N1C->isNullValue())
   3322     return N0;
   3323   // fold (or x, -1) -> -1
   3324   if (N1C && N1C->isAllOnesValue())
   3325     return N1;
   3326   // fold (or x, c) -> c iff (x & ~c) == 0
   3327   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
   3328     return N1;
   3329 
   3330   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
   3331   SDValue BSwap = MatchBSwapHWord(N, N0, N1);
   3332   if (BSwap.getNode())
   3333     return BSwap;
   3334   BSwap = MatchBSwapHWordLow(N, N0, N1);
   3335   if (BSwap.getNode())
   3336     return BSwap;
   3337 
   3338   // reassociate or
   3339   SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1);
   3340   if (ROR.getNode())
   3341     return ROR;
   3342   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
   3343   // iff (c1 & c2) == 0.
   3344   if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
   3345              isa<ConstantSDNode>(N0.getOperand(1))) {
   3346     ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
   3347     if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
   3348       SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1);
   3349       if (!COR.getNode())
   3350         return SDValue();
   3351       return DAG.getNode(ISD::AND, SDLoc(N), VT,
   3352                          DAG.getNode(ISD::OR, SDLoc(N0), VT,
   3353                                      N0.getOperand(0), N1), COR);
   3354     }
   3355   }
   3356   // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
   3357   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
   3358     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
   3359     ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
   3360 
   3361     if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
   3362         LL.getValueType().isInteger()) {
   3363       // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
   3364       // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
   3365       if (cast<ConstantSDNode>(LR)->isNullValue() &&
   3366           (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
   3367         SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
   3368                                      LR.getValueType(), LL, RL);
   3369         AddToWorkList(ORNode.getNode());
   3370         return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
   3371       }
   3372       // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
   3373       // fold (or (setgt X, -1), (setgt Y  -1)) -> (setgt (and X, Y), -1)
   3374       if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
   3375           (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
   3376         SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
   3377                                       LR.getValueType(), LL, RL);
   3378         AddToWorkList(ANDNode.getNode());
   3379         return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
   3380       }
   3381     }
   3382     // canonicalize equivalent to ll == rl
   3383     if (LL == RR && LR == RL) {
   3384       Op1 = ISD::getSetCCSwappedOperands(Op1);
   3385       std::swap(RL, RR);
   3386     }
   3387     if (LL == RL && LR == RR) {
   3388       bool isInteger = LL.getValueType().isInteger();
   3389       ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
   3390       if (Result != ISD::SETCC_INVALID &&
   3391           (!LegalOperations ||
   3392            (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
   3393             TLI.isOperationLegal(ISD::SETCC,
   3394               getSetCCResultType(N0.getValueType())))))
   3395         return DAG.getSetCC(SDLoc(N), N0.getValueType(),
   3396                             LL, LR, Result);
   3397     }
   3398   }
   3399 
   3400   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
   3401   if (N0.getOpcode() == N1.getOpcode()) {
   3402     SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
   3403     if (Tmp.getNode()) return Tmp;
   3404   }
   3405 
   3406   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
   3407   if (N0.getOpcode() == ISD::AND &&
   3408       N1.getOpcode() == ISD::AND &&
   3409       N0.getOperand(1).getOpcode() == ISD::Constant &&
   3410       N1.getOperand(1).getOpcode() == ISD::Constant &&
   3411       // Don't increase # computations.
   3412       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
   3413     // We can only do this xform if we know that bits from X that are set in C2
   3414     // but not in C1 are already zero.  Likewise for Y.
   3415     const APInt &LHSMask =
   3416       cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
   3417     const APInt &RHSMask =
   3418       cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
   3419 
   3420     if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
   3421         DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
   3422       SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
   3423                               N0.getOperand(0), N1.getOperand(0));
   3424       return DAG.getNode(ISD::AND, SDLoc(N), VT, X,
   3425                          DAG.getConstant(LHSMask | RHSMask, VT));
   3426     }
   3427   }
   3428 
   3429   // See if this is some rotate idiom.
   3430   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
   3431     return SDValue(Rot, 0);
   3432 
   3433   // Simplify the operands using demanded-bits information.
   3434   if (!VT.isVector() &&
   3435       SimplifyDemandedBits(SDValue(N, 0)))
   3436     return SDValue(N, 0);
   3437 
   3438   return SDValue();
   3439 }
   3440 
   3441 /// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
   3442 static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
   3443   if (Op.getOpcode() == ISD::AND) {
   3444     if (isa<ConstantSDNode>(Op.getOperand(1))) {
   3445       Mask = Op.getOperand(1);
   3446       Op = Op.getOperand(0);
   3447     } else {
   3448       return false;
   3449     }
   3450   }
   3451 
   3452   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
   3453     Shift = Op;
   3454     return true;
   3455   }
   3456 
   3457   return false;
   3458 }
   3459 
   3460 // Return true if we can prove that, whenever Neg and Pos are both in the
   3461 // range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos).  This means that
   3462 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
   3463 //
   3464 //     (or (shift1 X, Neg), (shift2 X, Pos))
   3465 //
   3466 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
   3467 // in direction shift1 by Neg.  The range [0, OpSize) means that we only need
   3468 // to consider shift amounts with defined behavior.
   3469 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) {
   3470   // If OpSize is a power of 2 then:
   3471   //
   3472   //  (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1)
   3473   //  (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize).
   3474   //
   3475   // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check
   3476   // for the stronger condition:
   3477   //
   3478   //     Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1)    [A]
   3479   //
   3480   // for all Neg and Pos.  Since Neg & (OpSize - 1) == Neg' & (OpSize - 1)
   3481   // we can just replace Neg with Neg' for the rest of the function.
   3482   //
   3483   // In other cases we check for the even stronger condition:
   3484   //
   3485   //     Neg == OpSize - Pos                                    [B]
   3486   //
   3487   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
   3488   // behavior if Pos == 0 (and consequently Neg == OpSize).
   3489   //
   3490   // We could actually use [A] whenever OpSize is a power of 2, but the
   3491   // only extra cases that it would match are those uninteresting ones
   3492   // where Neg and Pos are never in range at the same time.  E.g. for
   3493   // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
   3494   // as well as (sub 32, Pos), but:
   3495   //
   3496   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
   3497   //
   3498   // always invokes undefined behavior for 32-bit X.
   3499   //
   3500   // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise.
   3501   unsigned MaskLoBits = 0;
   3502   if (Neg.getOpcode() == ISD::AND &&
   3503       isPowerOf2_64(OpSize) &&
   3504       Neg.getOperand(1).getOpcode() == ISD::Constant &&
   3505       cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) {
   3506     Neg = Neg.getOperand(0);
   3507     MaskLoBits = Log2_64(OpSize);
   3508   }
   3509 
   3510   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
   3511   if (Neg.getOpcode() != ISD::SUB)
   3512     return 0;
   3513   ConstantSDNode *NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0));
   3514   if (!NegC)
   3515     return 0;
   3516   SDValue NegOp1 = Neg.getOperand(1);
   3517 
   3518   // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with
   3519   // Pos'.  The truncation is redundant for the purpose of the equality.
   3520   if (MaskLoBits &&
   3521       Pos.getOpcode() == ISD::AND &&
   3522       Pos.getOperand(1).getOpcode() == ISD::Constant &&
   3523       cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1)
   3524     Pos = Pos.getOperand(0);
   3525 
   3526   // The condition we need is now:
   3527   //
   3528   //     (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask
   3529   //
   3530   // If NegOp1 == Pos then we need:
   3531   //
   3532   //              OpSize & Mask == NegC & Mask
   3533   //
   3534   // (because "x & Mask" is a truncation and distributes through subtraction).
   3535   APInt Width;
   3536   if (Pos == NegOp1)
   3537     Width = NegC->getAPIntValue();
   3538   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
   3539   // Then the condition we want to prove becomes:
   3540   //
   3541   //     (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask
   3542   //
   3543   // which, again because "x & Mask" is a truncation, becomes:
   3544   //
   3545   //                NegC & Mask == (OpSize - PosC) & Mask
   3546   //              OpSize & Mask == (NegC + PosC) & Mask
   3547   else if (Pos.getOpcode() == ISD::ADD &&
   3548            Pos.getOperand(0) == NegOp1 &&
   3549            Pos.getOperand(1).getOpcode() == ISD::Constant)
   3550     Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() +
   3551              NegC->getAPIntValue());
   3552   else
   3553     return false;
   3554 
   3555   // Now we just need to check that OpSize & Mask == Width & Mask.
   3556   if (MaskLoBits)
   3557     // Opsize & Mask is 0 since Mask is Opsize - 1.
   3558     return Width.getLoBits(MaskLoBits) == 0;
   3559   return Width == OpSize;
   3560 }
   3561 
   3562 // A subroutine of MatchRotate used once we have found an OR of two opposite
   3563 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
   3564 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
   3565 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
   3566 // Neg with outer conversions stripped away.
   3567 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
   3568                                        SDValue Neg, SDValue InnerPos,
   3569                                        SDValue InnerNeg, unsigned PosOpcode,
   3570                                        unsigned NegOpcode, SDLoc DL) {
   3571   // fold (or (shl x, (*ext y)),
   3572   //          (srl x, (*ext (sub 32, y)))) ->
   3573   //   (rotl x, y) or (rotr x, (sub 32, y))
   3574   //
   3575   // fold (or (shl x, (*ext (sub 32, y))),
   3576   //          (srl x, (*ext y))) ->
   3577   //   (rotr x, y) or (rotl x, (sub 32, y))
   3578   EVT VT = Shifted.getValueType();
   3579   if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) {
   3580     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
   3581     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
   3582                        HasPos ? Pos : Neg).getNode();
   3583   }
   3584 
   3585   return nullptr;
   3586 }
   3587 
   3588 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
   3589 // idioms for rotate, and if the target supports rotation instructions, generate
   3590 // a rot[lr].
   3591 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
   3592   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
   3593   EVT VT = LHS.getValueType();
   3594   if (!TLI.isTypeLegal(VT)) return nullptr;
   3595 
   3596   // The target must have at least one rotate flavor.
   3597   bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
   3598   bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
   3599   if (!HasROTL && !HasROTR) return nullptr;
   3600 
   3601   // Match "(X shl/srl V1) & V2" where V2 may not be present.
   3602   SDValue LHSShift;   // The shift.
   3603   SDValue LHSMask;    // AND value if any.
   3604   if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
   3605     return nullptr; // Not part of a rotate.
   3606 
   3607   SDValue RHSShift;   // The shift.
   3608   SDValue RHSMask;    // AND value if any.
   3609   if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
   3610     return nullptr; // Not part of a rotate.
   3611 
   3612   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
   3613     return nullptr;   // Not shifting the same value.
   3614 
   3615   if (LHSShift.getOpcode() == RHSShift.getOpcode())
   3616     return nullptr;   // Shifts must disagree.
   3617 
   3618   // Canonicalize shl to left side in a shl/srl pair.
   3619   if (RHSShift.getOpcode() == ISD::SHL) {
   3620     std::swap(LHS, RHS);
   3621     std::swap(LHSShift, RHSShift);
   3622     std::swap(LHSMask , RHSMask );
   3623   }
   3624 
   3625   unsigned OpSizeInBits = VT.getSizeInBits();
   3626   SDValue LHSShiftArg = LHSShift.getOperand(0);
   3627   SDValue LHSShiftAmt = LHSShift.getOperand(1);
   3628   SDValue RHSShiftArg = RHSShift.getOperand(0);
   3629   SDValue RHSShiftAmt = RHSShift.getOperand(1);
   3630 
   3631   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
   3632   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
   3633   if (LHSShiftAmt.getOpcode() == ISD::Constant &&
   3634       RHSShiftAmt.getOpcode() == ISD::Constant) {
   3635     uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
   3636     uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
   3637     if ((LShVal + RShVal) != OpSizeInBits)
   3638       return nullptr;
   3639 
   3640     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
   3641                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
   3642 
   3643     // If there is an AND of either shifted operand, apply it to the result.
   3644     if (LHSMask.getNode() || RHSMask.getNode()) {
   3645       APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
   3646 
   3647       if (LHSMask.getNode()) {
   3648         APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
   3649         Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
   3650       }
   3651       if (RHSMask.getNode()) {
   3652         APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
   3653         Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
   3654       }
   3655 
   3656       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));
   3657     }
   3658 
   3659     return Rot.getNode();
   3660   }
   3661 
   3662   // If there is a mask here, and we have a variable shift, we can't be sure
   3663   // that we're masking out the right stuff.
   3664   if (LHSMask.getNode() || RHSMask.getNode())
   3665     return nullptr;
   3666 
   3667   // If the shift amount is sign/zext/any-extended just peel it off.
   3668   SDValue LExtOp0 = LHSShiftAmt;
   3669   SDValue RExtOp0 = RHSShiftAmt;
   3670   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
   3671        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
   3672        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
   3673        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
   3674       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
   3675        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
   3676        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
   3677        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
   3678     LExtOp0 = LHSShiftAmt.getOperand(0);
   3679     RExtOp0 = RHSShiftAmt.getOperand(0);
   3680   }
   3681 
   3682   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
   3683                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
   3684   if (TryL)
   3685     return TryL;
   3686 
   3687   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
   3688                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
   3689   if (TryR)
   3690     return TryR;
   3691 
   3692   return nullptr;
   3693 }
   3694 
   3695 SDValue DAGCombiner::visitXOR(SDNode *N) {
   3696   SDValue N0 = N->getOperand(0);
   3697   SDValue N1 = N->getOperand(1);
   3698   SDValue LHS, RHS, CC;
   3699   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   3700   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   3701   EVT VT = N0.getValueType();
   3702 
   3703   // fold vector ops
   3704   if (VT.isVector()) {
   3705     SDValue FoldedVOp = SimplifyVBinOp(N);
   3706     if (FoldedVOp.getNode()) return FoldedVOp;
   3707 
   3708     // fold (xor x, 0) -> x, vector edition
   3709     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   3710       return N1;
   3711     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   3712       return N0;
   3713   }
   3714 
   3715   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
   3716   if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
   3717     return DAG.getConstant(0, VT);
   3718   // fold (xor x, undef) -> undef
   3719   if (N0.getOpcode() == ISD::UNDEF)
   3720     return N0;
   3721   if (N1.getOpcode() == ISD::UNDEF)
   3722     return N1;
   3723   // fold (xor c1, c2) -> c1^c2
   3724   if (N0C && N1C)
   3725     return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
   3726   // canonicalize constant to RHS
   3727   if (N0C && !N1C)
   3728     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
   3729   // fold (xor x, 0) -> x
   3730   if (N1C && N1C->isNullValue())
   3731     return N0;
   3732   // reassociate xor
   3733   SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1);
   3734   if (RXOR.getNode())
   3735     return RXOR;
   3736 
   3737   // fold !(x cc y) -> (x !cc y)
   3738   if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {
   3739     bool isInt = LHS.getValueType().isInteger();
   3740     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
   3741                                                isInt);
   3742 
   3743     if (!LegalOperations ||
   3744         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
   3745       switch (N0.getOpcode()) {
   3746       default:
   3747         llvm_unreachable("Unhandled SetCC Equivalent!");
   3748       case ISD::SETCC:
   3749         return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC);
   3750       case ISD::SELECT_CC:
   3751         return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2),
   3752                                N0.getOperand(3), NotCC);
   3753       }
   3754     }
   3755   }
   3756 
   3757   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
   3758   if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND &&
   3759       N0.getNode()->hasOneUse() &&
   3760       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
   3761     SDValue V = N0.getOperand(0);
   3762     V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V,
   3763                     DAG.getConstant(1, V.getValueType()));
   3764     AddToWorkList(V.getNode());
   3765     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
   3766   }
   3767 
   3768   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
   3769   if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 &&
   3770       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
   3771     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
   3772     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
   3773       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
   3774       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
   3775       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
   3776       AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
   3777       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
   3778     }
   3779   }
   3780   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
   3781   if (N1C && N1C->isAllOnesValue() &&
   3782       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
   3783     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
   3784     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
   3785       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
   3786       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
   3787       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
   3788       AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
   3789       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
   3790     }
   3791   }
   3792   // fold (xor (and x, y), y) -> (and (not x), y)
   3793   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
   3794       N0->getOperand(1) == N1) {
   3795     SDValue X = N0->getOperand(0);
   3796     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
   3797     AddToWorkList(NotX.getNode());
   3798     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
   3799   }
   3800   // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
   3801   if (N1C && N0.getOpcode() == ISD::XOR) {
   3802     ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
   3803     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   3804     if (N00C)
   3805       return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1),
   3806                          DAG.getConstant(N1C->getAPIntValue() ^
   3807                                          N00C->getAPIntValue(), VT));
   3808     if (N01C)
   3809       return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0),
   3810                          DAG.getConstant(N1C->getAPIntValue() ^
   3811                                          N01C->getAPIntValue(), VT));
   3812   }
   3813   // fold (xor x, x) -> 0
   3814   if (N0 == N1)
   3815     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
   3816 
   3817   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
   3818   if (N0.getOpcode() == N1.getOpcode()) {
   3819     SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
   3820     if (Tmp.getNode()) return Tmp;
   3821   }
   3822 
   3823   // Simplify the expression using non-local knowledge.
   3824   if (!VT.isVector() &&
   3825       SimplifyDemandedBits(SDValue(N, 0)))
   3826     return SDValue(N, 0);
   3827 
   3828   return SDValue();
   3829 }
   3830 
   3831 /// visitShiftByConstant - Handle transforms common to the three shifts, when
   3832 /// the shift amount is a constant.
   3833 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
   3834   // We can't and shouldn't fold opaque constants.
   3835   if (Amt->isOpaque())
   3836     return SDValue();
   3837 
   3838   SDNode *LHS = N->getOperand(0).getNode();
   3839   if (!LHS->hasOneUse()) return SDValue();
   3840 
   3841   // We want to pull some binops through shifts, so that we have (and (shift))
   3842   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
   3843   // thing happens with address calculations, so it's important to canonicalize
   3844   // it.
   3845   bool HighBitSet = false;  // Can we transform this if the high bit is set?
   3846 
   3847   switch (LHS->getOpcode()) {
   3848   default: return SDValue();
   3849   case ISD::OR:
   3850   case ISD::XOR:
   3851     HighBitSet = false; // We can only transform sra if the high bit is clear.
   3852     break;
   3853   case ISD::AND:
   3854     HighBitSet = true;  // We can only transform sra if the high bit is set.
   3855     break;
   3856   case ISD::ADD:
   3857     if (N->getOpcode() != ISD::SHL)
   3858       return SDValue(); // only shl(add) not sr[al](add).
   3859     HighBitSet = false; // We can only transform sra if the high bit is clear.
   3860     break;
   3861   }
   3862 
   3863   // We require the RHS of the binop to be a constant and not opaque as well.
   3864   ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
   3865   if (!BinOpCst || BinOpCst->isOpaque()) return SDValue();
   3866 
   3867   // FIXME: disable this unless the input to the binop is a shift by a constant.
   3868   // If it is not a shift, it pessimizes some common cases like:
   3869   //
   3870   //    void foo(int *X, int i) { X[i & 1235] = 1; }
   3871   //    int bar(int *X, int i) { return X[i & 255]; }
   3872   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
   3873   if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
   3874        BinOpLHSVal->getOpcode() != ISD::SRA &&
   3875        BinOpLHSVal->getOpcode() != ISD::SRL) ||
   3876       !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
   3877     return SDValue();
   3878 
   3879   EVT VT = N->getValueType(0);
   3880 
   3881   // If this is a signed shift right, and the high bit is modified by the
   3882   // logical operation, do not perform the transformation. The highBitSet
   3883   // boolean indicates the value of the high bit of the constant which would
   3884   // cause it to be modified for this operation.
   3885   if (N->getOpcode() == ISD::SRA) {
   3886     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
   3887     if (BinOpRHSSignSet != HighBitSet)
   3888       return SDValue();
   3889   }
   3890 
   3891   if (!TLI.isDesirableToCommuteWithShift(LHS))
   3892     return SDValue();
   3893 
   3894   // Fold the constants, shifting the binop RHS by the shift amount.
   3895   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
   3896                                N->getValueType(0),
   3897                                LHS->getOperand(1), N->getOperand(1));
   3898   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
   3899 
   3900   // Create the new shift.
   3901   SDValue NewShift = DAG.getNode(N->getOpcode(),
   3902                                  SDLoc(LHS->getOperand(0)),
   3903                                  VT, LHS->getOperand(0), N->getOperand(1));
   3904 
   3905   // Create the new binop.
   3906   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
   3907 }
   3908 
   3909 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
   3910   assert(N->getOpcode() == ISD::TRUNCATE);
   3911   assert(N->getOperand(0).getOpcode() == ISD::AND);
   3912 
   3913   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
   3914   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
   3915     SDValue N01 = N->getOperand(0).getOperand(1);
   3916 
   3917     if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
   3918       EVT TruncVT = N->getValueType(0);
   3919       SDValue N00 = N->getOperand(0).getOperand(0);
   3920       APInt TruncC = N01C->getAPIntValue();
   3921       TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
   3922 
   3923       return DAG.getNode(ISD::AND, SDLoc(N), TruncVT,
   3924                          DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00),
   3925                          DAG.getConstant(TruncC, TruncVT));
   3926     }
   3927   }
   3928 
   3929   return SDValue();
   3930 }
   3931 
   3932 SDValue DAGCombiner::visitRotate(SDNode *N) {
   3933   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
   3934   if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
   3935       N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
   3936     SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode());
   3937     if (NewOp1.getNode())
   3938       return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
   3939                          N->getOperand(0), NewOp1);
   3940   }
   3941   return SDValue();
   3942 }
   3943 
   3944 SDValue DAGCombiner::visitSHL(SDNode *N) {
   3945   SDValue N0 = N->getOperand(0);
   3946   SDValue N1 = N->getOperand(1);
   3947   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   3948   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   3949   EVT VT = N0.getValueType();
   3950   unsigned OpSizeInBits = VT.getScalarSizeInBits();
   3951 
   3952   // fold vector ops
   3953   if (VT.isVector()) {
   3954     SDValue FoldedVOp = SimplifyVBinOp(N);
   3955     if (FoldedVOp.getNode()) return FoldedVOp;
   3956 
   3957     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
   3958     // If setcc produces all-one true value then:
   3959     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
   3960     if (N1CV && N1CV->isConstant()) {
   3961       if (N0.getOpcode() == ISD::AND) {
   3962         SDValue N00 = N0->getOperand(0);
   3963         SDValue N01 = N0->getOperand(1);
   3964         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
   3965 
   3966         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
   3967             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
   3968                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
   3969           SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV);
   3970           if (C.getNode())
   3971             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
   3972         }
   3973       } else {
   3974         N1C = isConstOrConstSplat(N1);
   3975       }
   3976     }
   3977   }
   3978 
   3979   // fold (shl c1, c2) -> c1<<c2
   3980   if (N0C && N1C)
   3981     return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
   3982   // fold (shl 0, x) -> 0
   3983   if (N0C && N0C->isNullValue())
   3984     return N0;
   3985   // fold (shl x, c >= size(x)) -> undef
   3986   if (N1C && N1C->getZExtValue() >= OpSizeInBits)
   3987     return DAG.getUNDEF(VT);
   3988   // fold (shl x, 0) -> x
   3989   if (N1C && N1C->isNullValue())
   3990     return N0;
   3991   // fold (shl undef, x) -> 0
   3992   if (N0.getOpcode() == ISD::UNDEF)
   3993     return DAG.getConstant(0, VT);
   3994   // if (shl x, c) is known to be zero, return 0
   3995   if (DAG.MaskedValueIsZero(SDValue(N, 0),
   3996                             APInt::getAllOnesValue(OpSizeInBits)))
   3997     return DAG.getConstant(0, VT);
   3998   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
   3999   if (N1.getOpcode() == ISD::TRUNCATE &&
   4000       N1.getOperand(0).getOpcode() == ISD::AND) {
   4001     SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
   4002     if (NewOp1.getNode())
   4003       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
   4004   }
   4005 
   4006   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
   4007     return SDValue(N, 0);
   4008 
   4009   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
   4010   if (N1C && N0.getOpcode() == ISD::SHL) {
   4011     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
   4012       uint64_t c1 = N0C1->getZExtValue();
   4013       uint64_t c2 = N1C->getZExtValue();
   4014       if (c1 + c2 >= OpSizeInBits)
   4015         return DAG.getConstant(0, VT);
   4016       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
   4017                          DAG.getConstant(c1 + c2, N1.getValueType()));
   4018     }
   4019   }
   4020 
   4021   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
   4022   // For this to be valid, the second form must not preserve any of the bits
   4023   // that are shifted out by the inner shift in the first form.  This means
   4024   // the outer shift size must be >= the number of bits added by the ext.
   4025   // As a corollary, we don't care what kind of ext it is.
   4026   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
   4027               N0.getOpcode() == ISD::ANY_EXTEND ||
   4028               N0.getOpcode() == ISD::SIGN_EXTEND) &&
   4029       N0.getOperand(0).getOpcode() == ISD::SHL) {
   4030     SDValue N0Op0 = N0.getOperand(0);
   4031     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
   4032       uint64_t c1 = N0Op0C1->getZExtValue();
   4033       uint64_t c2 = N1C->getZExtValue();
   4034       EVT InnerShiftVT = N0Op0.getValueType();
   4035       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
   4036       if (c2 >= OpSizeInBits - InnerShiftSize) {
   4037         if (c1 + c2 >= OpSizeInBits)
   4038           return DAG.getConstant(0, VT);
   4039         return DAG.getNode(ISD::SHL, SDLoc(N0), VT,
   4040                            DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
   4041                                        N0Op0->getOperand(0)),
   4042                            DAG.getConstant(c1 + c2, N1.getValueType()));
   4043       }
   4044     }
   4045   }
   4046 
   4047   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
   4048   // Only fold this if the inner zext has no other uses to avoid increasing
   4049   // the total number of instructions.
   4050   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
   4051       N0.getOperand(0).getOpcode() == ISD::SRL) {
   4052     SDValue N0Op0 = N0.getOperand(0);
   4053     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
   4054       uint64_t c1 = N0Op0C1->getZExtValue();
   4055       if (c1 < VT.getScalarSizeInBits()) {
   4056         uint64_t c2 = N1C->getZExtValue();
   4057         if (c1 == c2) {
   4058           SDValue NewOp0 = N0.getOperand(0);
   4059           EVT CountVT = NewOp0.getOperand(1).getValueType();
   4060           SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
   4061                                        NewOp0, DAG.getConstant(c2, CountVT));
   4062           AddToWorkList(NewSHL.getNode());
   4063           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
   4064         }
   4065       }
   4066     }
   4067   }
   4068 
   4069   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
   4070   //                               (and (srl x, (sub c1, c2), MASK)
   4071   // Only fold this if the inner shift has no other uses -- if it does, folding
   4072   // this will increase the total number of instructions.
   4073   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
   4074     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
   4075       uint64_t c1 = N0C1->getZExtValue();
   4076       if (c1 < OpSizeInBits) {
   4077         uint64_t c2 = N1C->getZExtValue();
   4078         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
   4079         SDValue Shift;
   4080         if (c2 > c1) {
   4081           Mask = Mask.shl(c2 - c1);
   4082           Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
   4083                               DAG.getConstant(c2 - c1, N1.getValueType()));
   4084         } else {
   4085           Mask = Mask.lshr(c1 - c2);
   4086           Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
   4087                               DAG.getConstant(c1 - c2, N1.getValueType()));
   4088         }
   4089         return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift,
   4090                            DAG.getConstant(Mask, VT));
   4091       }
   4092     }
   4093   }
   4094   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
   4095   if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
   4096     unsigned BitSize = VT.getScalarSizeInBits();
   4097     SDValue HiBitsMask =
   4098       DAG.getConstant(APInt::getHighBitsSet(BitSize,
   4099                                             BitSize - N1C->getZExtValue()), VT);
   4100     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
   4101                        HiBitsMask);
   4102   }
   4103 
   4104   if (N1C) {
   4105     SDValue NewSHL = visitShiftByConstant(N, N1C);
   4106     if (NewSHL.getNode())
   4107       return NewSHL;
   4108   }
   4109 
   4110   return SDValue();
   4111 }
   4112 
   4113 SDValue DAGCombiner::visitSRA(SDNode *N) {
   4114   SDValue N0 = N->getOperand(0);
   4115   SDValue N1 = N->getOperand(1);
   4116   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   4117   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   4118   EVT VT = N0.getValueType();
   4119   unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
   4120 
   4121   // fold vector ops
   4122   if (VT.isVector()) {
   4123     SDValue FoldedVOp = SimplifyVBinOp(N);
   4124     if (FoldedVOp.getNode()) return FoldedVOp;
   4125 
   4126     N1C = isConstOrConstSplat(N1);
   4127   }
   4128 
   4129   // fold (sra c1, c2) -> (sra c1, c2)
   4130   if (N0C && N1C)
   4131     return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
   4132   // fold (sra 0, x) -> 0
   4133   if (N0C && N0C->isNullValue())
   4134     return N0;
   4135   // fold (sra -1, x) -> -1
   4136   if (N0C && N0C->isAllOnesValue())
   4137     return N0;
   4138   // fold (sra x, (setge c, size(x))) -> undef
   4139   if (N1C && N1C->getZExtValue() >= OpSizeInBits)
   4140     return DAG.getUNDEF(VT);
   4141   // fold (sra x, 0) -> x
   4142   if (N1C && N1C->isNullValue())
   4143     return N0;
   4144   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
   4145   // sext_inreg.
   4146   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
   4147     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
   4148     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
   4149     if (VT.isVector())
   4150       ExtVT = EVT::getVectorVT(*DAG.getContext(),
   4151                                ExtVT, VT.getVectorNumElements());
   4152     if ((!LegalOperations ||
   4153          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
   4154       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
   4155                          N0.getOperand(0), DAG.getValueType(ExtVT));
   4156   }
   4157 
   4158   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
   4159   if (N1C && N0.getOpcode() == ISD::SRA) {
   4160     if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) {
   4161       unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
   4162       if (Sum >= OpSizeInBits)
   4163         Sum = OpSizeInBits - 1;
   4164       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
   4165                          DAG.getConstant(Sum, N1.getValueType()));
   4166     }
   4167   }
   4168 
   4169   // fold (sra (shl X, m), (sub result_size, n))
   4170   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
   4171   // result_size - n != m.
   4172   // If truncate is free for the target sext(shl) is likely to result in better
   4173   // code.
   4174   if (N0.getOpcode() == ISD::SHL && N1C) {
   4175     // Get the two constanst of the shifts, CN0 = m, CN = n.
   4176     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
   4177     if (N01C) {
   4178       LLVMContext &Ctx = *DAG.getContext();
   4179       // Determine what the truncate's result bitsize and type would be.
   4180       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
   4181 
   4182       if (VT.isVector())
   4183         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
   4184 
   4185       // Determine the residual right-shift amount.
   4186       signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
   4187 
   4188       // If the shift is not a no-op (in which case this should be just a sign
   4189       // extend already), the truncated to type is legal, sign_extend is legal
   4190       // on that type, and the truncate to that type is both legal and free,
   4191       // perform the transform.
   4192       if ((ShiftAmt > 0) &&
   4193           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
   4194           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
   4195           TLI.isTruncateFree(VT, TruncVT)) {
   4196 
   4197           SDValue Amt = DAG.getConstant(ShiftAmt,
   4198               getShiftAmountTy(N0.getOperand(0).getValueType()));
   4199           SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT,
   4200                                       N0.getOperand(0), Amt);
   4201           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT,
   4202                                       Shift);
   4203           return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N),
   4204                              N->getValueType(0), Trunc);
   4205       }
   4206     }
   4207   }
   4208 
   4209   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
   4210   if (N1.getOpcode() == ISD::TRUNCATE &&
   4211       N1.getOperand(0).getOpcode() == ISD::AND) {
   4212     SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
   4213     if (NewOp1.getNode())
   4214       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
   4215   }
   4216 
   4217   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
   4218   //      if c1 is equal to the number of bits the trunc removes
   4219   if (N0.getOpcode() == ISD::TRUNCATE &&
   4220       (N0.getOperand(0).getOpcode() == ISD::SRL ||
   4221        N0.getOperand(0).getOpcode() == ISD::SRA) &&
   4222       N0.getOperand(0).hasOneUse() &&
   4223       N0.getOperand(0).getOperand(1).hasOneUse() &&
   4224       N1C) {
   4225     SDValue N0Op0 = N0.getOperand(0);
   4226     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
   4227       unsigned LargeShiftVal = LargeShift->getZExtValue();
   4228       EVT LargeVT = N0Op0.getValueType();
   4229 
   4230       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
   4231         SDValue Amt =
   4232           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(),
   4233                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
   4234         SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT,
   4235                                   N0Op0.getOperand(0), Amt);
   4236         return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA);
   4237       }
   4238     }
   4239   }
   4240 
   4241   // Simplify, based on bits shifted out of the LHS.
   4242   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
   4243     return SDValue(N, 0);
   4244 
   4245 
   4246   // If the sign bit is known to be zero, switch this to a SRL.
   4247   if (DAG.SignBitIsZero(N0))
   4248     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
   4249 
   4250   if (N1C) {
   4251     SDValue NewSRA = visitShiftByConstant(N, N1C);
   4252     if (NewSRA.getNode())
   4253       return NewSRA;
   4254   }
   4255 
   4256   return SDValue();
   4257 }
   4258 
   4259 SDValue DAGCombiner::visitSRL(SDNode *N) {
   4260   SDValue N0 = N->getOperand(0);
   4261   SDValue N1 = N->getOperand(1);
   4262   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   4263   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   4264   EVT VT = N0.getValueType();
   4265   unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
   4266 
   4267   // fold vector ops
   4268   if (VT.isVector()) {
   4269     SDValue FoldedVOp = SimplifyVBinOp(N);
   4270     if (FoldedVOp.getNode()) return FoldedVOp;
   4271 
   4272     N1C = isConstOrConstSplat(N1);
   4273   }
   4274 
   4275   // fold (srl c1, c2) -> c1 >>u c2
   4276   if (N0C && N1C)
   4277     return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
   4278   // fold (srl 0, x) -> 0
   4279   if (N0C && N0C->isNullValue())
   4280     return N0;
   4281   // fold (srl x, c >= size(x)) -> undef
   4282   if (N1C && N1C->getZExtValue() >= OpSizeInBits)
   4283     return DAG.getUNDEF(VT);
   4284   // fold (srl x, 0) -> x
   4285   if (N1C && N1C->isNullValue())
   4286     return N0;
   4287   // if (srl x, c) is known to be zero, return 0
   4288   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
   4289                                    APInt::getAllOnesValue(OpSizeInBits)))
   4290     return DAG.getConstant(0, VT);
   4291 
   4292   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
   4293   if (N1C && N0.getOpcode() == ISD::SRL) {
   4294     if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) {
   4295       uint64_t c1 = N01C->getZExtValue();
   4296       uint64_t c2 = N1C->getZExtValue();
   4297       if (c1 + c2 >= OpSizeInBits)
   4298         return DAG.getConstant(0, VT);
   4299       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
   4300                          DAG.getConstant(c1 + c2, N1.getValueType()));
   4301     }
   4302   }
   4303 
   4304   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
   4305   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
   4306       N0.getOperand(0).getOpcode() == ISD::SRL &&
   4307       isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
   4308     uint64_t c1 =
   4309       cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
   4310     uint64_t c2 = N1C->getZExtValue();
   4311     EVT InnerShiftVT = N0.getOperand(0).getValueType();
   4312     EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
   4313     uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
   4314     // This is only valid if the OpSizeInBits + c1 = size of inner shift.
   4315     if (c1 + OpSizeInBits == InnerShiftSize) {
   4316       if (c1 + c2 >= InnerShiftSize)
   4317         return DAG.getConstant(0, VT);
   4318       return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT,
   4319                          DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT,
   4320                                      N0.getOperand(0)->getOperand(0),
   4321                                      DAG.getConstant(c1 + c2, ShiftCountVT)));
   4322     }
   4323   }
   4324 
   4325   // fold (srl (shl x, c), c) -> (and x, cst2)
   4326   if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) {
   4327     unsigned BitSize = N0.getScalarValueSizeInBits();
   4328     if (BitSize <= 64) {
   4329       uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
   4330       return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
   4331                          DAG.getConstant(~0ULL >> ShAmt, VT));
   4332     }
   4333   }
   4334 
   4335   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
   4336   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
   4337     // Shifting in all undef bits?
   4338     EVT SmallVT = N0.getOperand(0).getValueType();
   4339     unsigned BitSize = SmallVT.getScalarSizeInBits();
   4340     if (N1C->getZExtValue() >= BitSize)
   4341       return DAG.getUNDEF(VT);
   4342 
   4343     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
   4344       uint64_t ShiftAmt = N1C->getZExtValue();
   4345       SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT,
   4346                                        N0.getOperand(0),
   4347                           DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
   4348       AddToWorkList(SmallShift.getNode());
   4349       APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
   4350       return DAG.getNode(ISD::AND, SDLoc(N), VT,
   4351                          DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift),
   4352                          DAG.getConstant(Mask, VT));
   4353     }
   4354   }
   4355 
   4356   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
   4357   // bit, which is unmodified by sra.
   4358   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
   4359     if (N0.getOpcode() == ISD::SRA)
   4360       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
   4361   }
   4362 
   4363   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
   4364   if (N1C && N0.getOpcode() == ISD::CTLZ &&
   4365       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
   4366     APInt KnownZero, KnownOne;
   4367     DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne);
   4368 
   4369     // If any of the input bits are KnownOne, then the input couldn't be all
   4370     // zeros, thus the result of the srl will always be zero.
   4371     if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT);
   4372 
   4373     // If all of the bits input the to ctlz node are known to be zero, then
   4374     // the result of the ctlz is "32" and the result of the shift is one.
   4375     APInt UnknownBits = ~KnownZero;
   4376     if (UnknownBits == 0) return DAG.getConstant(1, VT);
   4377 
   4378     // Otherwise, check to see if there is exactly one bit input to the ctlz.
   4379     if ((UnknownBits & (UnknownBits - 1)) == 0) {
   4380       // Okay, we know that only that the single bit specified by UnknownBits
   4381       // could be set on input to the CTLZ node. If this bit is set, the SRL
   4382       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
   4383       // to an SRL/XOR pair, which is likely to simplify more.
   4384       unsigned ShAmt = UnknownBits.countTrailingZeros();
   4385       SDValue Op = N0.getOperand(0);
   4386 
   4387       if (ShAmt) {
   4388         Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op,
   4389                   DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType())));
   4390         AddToWorkList(Op.getNode());
   4391       }
   4392 
   4393       return DAG.getNode(ISD::XOR, SDLoc(N), VT,
   4394                          Op, DAG.getConstant(1, VT));
   4395     }
   4396   }
   4397 
   4398   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
   4399   if (N1.getOpcode() == ISD::TRUNCATE &&
   4400       N1.getOperand(0).getOpcode() == ISD::AND) {
   4401     SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
   4402     if (NewOp1.getNode())
   4403       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
   4404   }
   4405 
   4406   // fold operands of srl based on knowledge that the low bits are not
   4407   // demanded.
   4408   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
   4409     return SDValue(N, 0);
   4410 
   4411   if (N1C) {
   4412     SDValue NewSRL = visitShiftByConstant(N, N1C);
   4413     if (NewSRL.getNode())
   4414       return NewSRL;
   4415   }
   4416 
   4417   // Attempt to convert a srl of a load into a narrower zero-extending load.
   4418   SDValue NarrowLoad = ReduceLoadWidth(N);
   4419   if (NarrowLoad.getNode())
   4420     return NarrowLoad;
   4421 
   4422   // Here is a common situation. We want to optimize:
   4423   //
   4424   //   %a = ...
   4425   //   %b = and i32 %a, 2
   4426   //   %c = srl i32 %b, 1
   4427   //   brcond i32 %c ...
   4428   //
   4429   // into
   4430   //
   4431   //   %a = ...
   4432   //   %b = and %a, 2
   4433   //   %c = setcc eq %b, 0
   4434   //   brcond %c ...
   4435   //
   4436   // However when after the source operand of SRL is optimized into AND, the SRL
   4437   // itself may not be optimized further. Look for it and add the BRCOND into
   4438   // the worklist.
   4439   if (N->hasOneUse()) {
   4440     SDNode *Use = *N->use_begin();
   4441     if (Use->getOpcode() == ISD::BRCOND)
   4442       AddToWorkList(Use);
   4443     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
   4444       // Also look pass the truncate.
   4445       Use = *Use->use_begin();
   4446       if (Use->getOpcode() == ISD::BRCOND)
   4447         AddToWorkList(Use);
   4448     }
   4449   }
   4450 
   4451   return SDValue();
   4452 }
   4453 
   4454 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
   4455   SDValue N0 = N->getOperand(0);
   4456   EVT VT = N->getValueType(0);
   4457 
   4458   // fold (ctlz c1) -> c2
   4459   if (isa<ConstantSDNode>(N0))
   4460     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
   4461   return SDValue();
   4462 }
   4463 
   4464 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
   4465   SDValue N0 = N->getOperand(0);
   4466   EVT VT = N->getValueType(0);
   4467 
   4468   // fold (ctlz_zero_undef c1) -> c2
   4469   if (isa<ConstantSDNode>(N0))
   4470     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
   4471   return SDValue();
   4472 }
   4473 
   4474 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
   4475   SDValue N0 = N->getOperand(0);
   4476   EVT VT = N->getValueType(0);
   4477 
   4478   // fold (cttz c1) -> c2
   4479   if (isa<ConstantSDNode>(N0))
   4480     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
   4481   return SDValue();
   4482 }
   4483 
   4484 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
   4485   SDValue N0 = N->getOperand(0);
   4486   EVT VT = N->getValueType(0);
   4487 
   4488   // fold (cttz_zero_undef c1) -> c2
   4489   if (isa<ConstantSDNode>(N0))
   4490     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
   4491   return SDValue();
   4492 }
   4493 
   4494 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
   4495   SDValue N0 = N->getOperand(0);
   4496   EVT VT = N->getValueType(0);
   4497 
   4498   // fold (ctpop c1) -> c2
   4499   if (isa<ConstantSDNode>(N0))
   4500     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
   4501   return SDValue();
   4502 }
   4503 
   4504 SDValue DAGCombiner::visitSELECT(SDNode *N) {
   4505   SDValue N0 = N->getOperand(0);
   4506   SDValue N1 = N->getOperand(1);
   4507   SDValue N2 = N->getOperand(2);
   4508   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   4509   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   4510   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
   4511   EVT VT = N->getValueType(0);
   4512   EVT VT0 = N0.getValueType();
   4513 
   4514   // fold (select C, X, X) -> X
   4515   if (N1 == N2)
   4516     return N1;
   4517   // fold (select true, X, Y) -> X
   4518   if (N0C && !N0C->isNullValue())
   4519     return N1;
   4520   // fold (select false, X, Y) -> Y
   4521   if (N0C && N0C->isNullValue())
   4522     return N2;
   4523   // fold (select C, 1, X) -> (or C, X)
   4524   if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
   4525     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
   4526   // fold (select C, 0, 1) -> (xor C, 1)
   4527   // We can't do this reliably if integer based booleans have different contents
   4528   // to floating point based booleans. This is because we can't tell whether we
   4529   // have an integer-based boolean or a floating-point-based boolean unless we
   4530   // can find the SETCC that produced it and inspect its operands. This is
   4531   // fairly easy if C is the SETCC node, but it can potentially be
   4532   // undiscoverable (or not reasonably discoverable). For example, it could be
   4533   // in another basic block or it could require searching a complicated
   4534   // expression.
   4535   if (VT.isInteger() &&
   4536       (VT0 == MVT::i1 || (VT0.isInteger() &&
   4537                           TLI.getBooleanContents(false, false) ==
   4538                               TLI.getBooleanContents(false, true) &&
   4539                           TLI.getBooleanContents(false, false) ==
   4540                               TargetLowering::ZeroOrOneBooleanContent)) &&
   4541       N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
   4542     SDValue XORNode;
   4543     if (VT == VT0)
   4544       return DAG.getNode(ISD::XOR, SDLoc(N), VT0,
   4545                          N0, DAG.getConstant(1, VT0));
   4546     XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0,
   4547                           N0, DAG.getConstant(1, VT0));
   4548     AddToWorkList(XORNode.getNode());
   4549     if (VT.bitsGT(VT0))
   4550       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
   4551     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
   4552   }
   4553   // fold (select C, 0, X) -> (and (not C), X)
   4554   if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
   4555     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
   4556     AddToWorkList(NOTNode.getNode());
   4557     return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
   4558   }
   4559   // fold (select C, X, 1) -> (or (not C), X)
   4560   if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
   4561     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
   4562     AddToWorkList(NOTNode.getNode());
   4563     return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
   4564   }
   4565   // fold (select C, X, 0) -> (and C, X)
   4566   if (VT == MVT::i1 && N2C && N2C->isNullValue())
   4567     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
   4568   // fold (select X, X, Y) -> (or X, Y)
   4569   // fold (select X, 1, Y) -> (or X, Y)
   4570   if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1)))
   4571     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
   4572   // fold (select X, Y, X) -> (and X, Y)
   4573   // fold (select X, Y, 0) -> (and X, Y)
   4574   if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0)))
   4575     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
   4576 
   4577   // If we can fold this based on the true/false value, do so.
   4578   if (SimplifySelectOps(N, N1, N2))
   4579     return SDValue(N, 0);  // Don't revisit N.
   4580 
   4581   // fold selects based on a setcc into other things, such as min/max/abs
   4582   if (N0.getOpcode() == ISD::SETCC) {
   4583     if ((!LegalOperations &&
   4584          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
   4585 	TLI.isOperationLegal(ISD::SELECT_CC, VT))
   4586       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
   4587                          N0.getOperand(0), N0.getOperand(1),
   4588                          N1, N2, N0.getOperand(2));
   4589     return SimplifySelect(SDLoc(N), N0, N1, N2);
   4590   }
   4591 
   4592   return SDValue();
   4593 }
   4594 
   4595 static
   4596 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
   4597   SDLoc DL(N);
   4598   EVT LoVT, HiVT;
   4599   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
   4600 
   4601   // Split the inputs.
   4602   SDValue Lo, Hi, LL, LH, RL, RH;
   4603   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
   4604   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
   4605 
   4606   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
   4607   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
   4608 
   4609   return std::make_pair(Lo, Hi);
   4610 }
   4611 
   4612 // This function assumes all the vselect's arguments are CONCAT_VECTOR
   4613 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
   4614 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
   4615   SDLoc dl(N);
   4616   SDValue Cond = N->getOperand(0);
   4617   SDValue LHS = N->getOperand(1);
   4618   SDValue RHS = N->getOperand(2);
   4619   MVT VT = N->getSimpleValueType(0);
   4620   int NumElems = VT.getVectorNumElements();
   4621   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
   4622          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
   4623          Cond.getOpcode() == ISD::BUILD_VECTOR);
   4624 
   4625   // We're sure we have an even number of elements due to the
   4626   // concat_vectors we have as arguments to vselect.
   4627   // Skip BV elements until we find one that's not an UNDEF
   4628   // After we find an UNDEF element, keep looping until we get to half the
   4629   // length of the BV and see if all the non-undef nodes are the same.
   4630   ConstantSDNode *BottomHalf = nullptr;
   4631   for (int i = 0; i < NumElems / 2; ++i) {
   4632     if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
   4633       continue;
   4634 
   4635     if (BottomHalf == nullptr)
   4636       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
   4637     else if (Cond->getOperand(i).getNode() != BottomHalf)
   4638       return SDValue();
   4639   }
   4640 
   4641   // Do the same for the second half of the BuildVector
   4642   ConstantSDNode *TopHalf = nullptr;
   4643   for (int i = NumElems / 2; i < NumElems; ++i) {
   4644     if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
   4645       continue;
   4646 
   4647     if (TopHalf == nullptr)
   4648       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
   4649     else if (Cond->getOperand(i).getNode() != TopHalf)
   4650       return SDValue();
   4651   }
   4652 
   4653   assert(TopHalf && BottomHalf &&
   4654          "One half of the selector was all UNDEFs and the other was all the "
   4655          "same value. This should have been addressed before this function.");
   4656   return DAG.getNode(
   4657       ISD::CONCAT_VECTORS, dl, VT,
   4658       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
   4659       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
   4660 }
   4661 
   4662 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
   4663   SDValue N0 = N->getOperand(0);
   4664   SDValue N1 = N->getOperand(1);
   4665   SDValue N2 = N->getOperand(2);
   4666   SDLoc DL(N);
   4667 
   4668   // Canonicalize integer abs.
   4669   // vselect (setg[te] X,  0),  X, -X ->
   4670   // vselect (setgt    X, -1),  X, -X ->
   4671   // vselect (setl[te] X,  0), -X,  X ->
   4672   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
   4673   if (N0.getOpcode() == ISD::SETCC) {
   4674     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
   4675     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
   4676     bool isAbs = false;
   4677     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
   4678 
   4679     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
   4680          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
   4681         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
   4682       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
   4683     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
   4684              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
   4685       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
   4686 
   4687     if (isAbs) {
   4688       EVT VT = LHS.getValueType();
   4689       SDValue Shift = DAG.getNode(
   4690           ISD::SRA, DL, VT, LHS,
   4691           DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT));
   4692       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
   4693       AddToWorkList(Shift.getNode());
   4694       AddToWorkList(Add.getNode());
   4695       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
   4696     }
   4697   }
   4698 
   4699   // If the VSELECT result requires splitting and the mask is provided by a
   4700   // SETCC, then split both nodes and its operands before legalization. This
   4701   // prevents the type legalizer from unrolling SETCC into scalar comparisons
   4702   // and enables future optimizations (e.g. min/max pattern matching on X86).
   4703   if (N0.getOpcode() == ISD::SETCC) {
   4704     EVT VT = N->getValueType(0);
   4705 
   4706     // Check if any splitting is required.
   4707     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
   4708         TargetLowering::TypeSplitVector)
   4709       return SDValue();
   4710 
   4711     SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
   4712     std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
   4713     std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
   4714     std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
   4715 
   4716     Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
   4717     Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
   4718 
   4719     // Add the new VSELECT nodes to the work list in case they need to be split
   4720     // again.
   4721     AddToWorkList(Lo.getNode());
   4722     AddToWorkList(Hi.getNode());
   4723 
   4724     return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
   4725   }
   4726 
   4727   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
   4728   if (ISD::isBuildVectorAllOnes(N0.getNode()))
   4729     return N1;
   4730   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
   4731   if (ISD::isBuildVectorAllZeros(N0.getNode()))
   4732     return N2;
   4733 
   4734   // The ConvertSelectToConcatVector function is assuming both the above
   4735   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
   4736   // and addressed.
   4737   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
   4738       N2.getOpcode() == ISD::CONCAT_VECTORS &&
   4739       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
   4740     SDValue CV = ConvertSelectToConcatVector(N, DAG);
   4741     if (CV.getNode())
   4742       return CV;
   4743   }
   4744 
   4745   return SDValue();
   4746 }
   4747 
   4748 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
   4749   SDValue N0 = N->getOperand(0);
   4750   SDValue N1 = N->getOperand(1);
   4751   SDValue N2 = N->getOperand(2);
   4752   SDValue N3 = N->getOperand(3);
   4753   SDValue N4 = N->getOperand(4);
   4754   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
   4755 
   4756   // fold select_cc lhs, rhs, x, x, cc -> x
   4757   if (N2 == N3)
   4758     return N2;
   4759 
   4760   // Determine if the condition we're dealing with is constant
   4761   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
   4762                               N0, N1, CC, SDLoc(N), false);
   4763   if (SCC.getNode()) {
   4764     AddToWorkList(SCC.getNode());
   4765 
   4766     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
   4767       if (!SCCC->isNullValue())
   4768         return N2;    // cond always true -> true val
   4769       else
   4770         return N3;    // cond always false -> false val
   4771     }
   4772 
   4773     // Fold to a simpler select_cc
   4774     if (SCC.getOpcode() == ISD::SETCC)
   4775       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
   4776                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
   4777                          SCC.getOperand(2));
   4778   }
   4779 
   4780   // If we can fold this based on the true/false value, do so.
   4781   if (SimplifySelectOps(N, N2, N3))
   4782     return SDValue(N, 0);  // Don't revisit N.
   4783 
   4784   // fold select_cc into other things, such as min/max/abs
   4785   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
   4786 }
   4787 
   4788 SDValue DAGCombiner::visitSETCC(SDNode *N) {
   4789   return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
   4790                        cast<CondCodeSDNode>(N->getOperand(2))->get(),
   4791                        SDLoc(N));
   4792 }
   4793 
   4794 // tryToFoldExtendOfConstant - Try to fold a sext/zext/aext
   4795 // dag node into a ConstantSDNode or a build_vector of constants.
   4796 // This function is called by the DAGCombiner when visiting sext/zext/aext
   4797 // dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
   4798 // Vector extends are not folded if operations are legal; this is to
   4799 // avoid introducing illegal build_vector dag nodes.
   4800 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
   4801                                          SelectionDAG &DAG, bool LegalTypes,
   4802                                          bool LegalOperations) {
   4803   unsigned Opcode = N->getOpcode();
   4804   SDValue N0 = N->getOperand(0);
   4805   EVT VT = N->getValueType(0);
   4806 
   4807   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
   4808          Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!");
   4809 
   4810   // fold (sext c1) -> c1
   4811   // fold (zext c1) -> c1
   4812   // fold (aext c1) -> c1
   4813   if (isa<ConstantSDNode>(N0))
   4814     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
   4815 
   4816   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
   4817   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
   4818   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
   4819   EVT SVT = VT.getScalarType();
   4820   if (!(VT.isVector() &&
   4821       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
   4822       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
   4823     return nullptr;
   4824 
   4825   // We can fold this node into a build_vector.
   4826   unsigned VTBits = SVT.getSizeInBits();
   4827   unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
   4828   unsigned ShAmt = VTBits - EVTBits;
   4829   SmallVector<SDValue, 8> Elts;
   4830   unsigned NumElts = N0->getNumOperands();
   4831   SDLoc DL(N);
   4832 
   4833   for (unsigned i=0; i != NumElts; ++i) {
   4834     SDValue Op = N0->getOperand(i);
   4835     if (Op->getOpcode() == ISD::UNDEF) {
   4836       Elts.push_back(DAG.getUNDEF(SVT));
   4837       continue;
   4838     }
   4839 
   4840     ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
   4841     const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
   4842     if (Opcode == ISD::SIGN_EXTEND)
   4843       Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
   4844                                      SVT));
   4845     else
   4846       Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(),
   4847                                      SVT));
   4848   }
   4849 
   4850   return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode();
   4851 }
   4852 
   4853 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
   4854 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
   4855 // transformation. Returns true if extension are possible and the above
   4856 // mentioned transformation is profitable.
   4857 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
   4858                                     unsigned ExtOpc,
   4859                                     SmallVectorImpl<SDNode *> &ExtendNodes,
   4860                                     const TargetLowering &TLI) {
   4861   bool HasCopyToRegUses = false;
   4862   bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
   4863   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
   4864                             UE = N0.getNode()->use_end();
   4865        UI != UE; ++UI) {
   4866     SDNode *User = *UI;
   4867     if (User == N)
   4868       continue;
   4869     if (UI.getUse().getResNo() != N0.getResNo())
   4870       continue;
   4871     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
   4872     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
   4873       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
   4874       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
   4875         // Sign bits will be lost after a zext.
   4876         return false;
   4877       bool Add = false;
   4878       for (unsigned i = 0; i != 2; ++i) {
   4879         SDValue UseOp = User->getOperand(i);
   4880         if (UseOp == N0)
   4881           continue;
   4882         if (!isa<ConstantSDNode>(UseOp))
   4883           return false;
   4884         Add = true;
   4885       }
   4886       if (Add)
   4887         ExtendNodes.push_back(User);
   4888       continue;
   4889     }
   4890     // If truncates aren't free and there are users we can't
   4891     // extend, it isn't worthwhile.
   4892     if (!isTruncFree)
   4893       return false;
   4894     // Remember if this value is live-out.
   4895     if (User->getOpcode() == ISD::CopyToReg)
   4896       HasCopyToRegUses = true;
   4897   }
   4898 
   4899   if (HasCopyToRegUses) {
   4900     bool BothLiveOut = false;
   4901     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
   4902          UI != UE; ++UI) {
   4903       SDUse &Use = UI.getUse();
   4904       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
   4905         BothLiveOut = true;
   4906         break;
   4907       }
   4908     }
   4909     if (BothLiveOut)
   4910       // Both unextended and extended values are live out. There had better be
   4911       // a good reason for the transformation.
   4912       return ExtendNodes.size();
   4913   }
   4914   return true;
   4915 }
   4916 
   4917 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
   4918                                   SDValue Trunc, SDValue ExtLoad, SDLoc DL,
   4919                                   ISD::NodeType ExtType) {
   4920   // Extend SetCC uses if necessary.
   4921   for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
   4922     SDNode *SetCC = SetCCs[i];
   4923     SmallVector<SDValue, 4> Ops;
   4924 
   4925     for (unsigned j = 0; j != 2; ++j) {
   4926       SDValue SOp = SetCC->getOperand(j);
   4927       if (SOp == Trunc)
   4928         Ops.push_back(ExtLoad);
   4929       else
   4930         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
   4931     }
   4932 
   4933     Ops.push_back(SetCC->getOperand(2));
   4934     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
   4935   }
   4936 }
   4937 
   4938 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   4939   SDValue N0 = N->getOperand(0);
   4940   EVT VT = N->getValueType(0);
   4941 
   4942   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
   4943                                               LegalOperations))
   4944     return SDValue(Res, 0);
   4945 
   4946   // fold (sext (sext x)) -> (sext x)
   4947   // fold (sext (aext x)) -> (sext x)
   4948   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
   4949     return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT,
   4950                        N0.getOperand(0));
   4951 
   4952   if (N0.getOpcode() == ISD::TRUNCATE) {
   4953     // fold (sext (truncate (load x))) -> (sext (smaller load x))
   4954     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
   4955     SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
   4956     if (NarrowLoad.getNode()) {
   4957       SDNode* oye = N0.getNode()->getOperand(0).getNode();
   4958       if (NarrowLoad.getNode() != N0.getNode()) {
   4959         CombineTo(N0.getNode(), NarrowLoad);
   4960         // CombineTo deleted the truncate, if needed, but not what's under it.
   4961         AddToWorkList(oye);
   4962       }
   4963       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   4964     }
   4965 
   4966     // See if the value being truncated is already sign extended.  If so, just
   4967     // eliminate the trunc/sext pair.
   4968     SDValue Op = N0.getOperand(0);
   4969     unsigned OpBits   = Op.getValueType().getScalarType().getSizeInBits();
   4970     unsigned MidBits  = N0.getValueType().getScalarType().getSizeInBits();
   4971     unsigned DestBits = VT.getScalarType().getSizeInBits();
   4972     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
   4973 
   4974     if (OpBits == DestBits) {
   4975       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
   4976       // bits, it is already ready.
   4977       if (NumSignBits > DestBits-MidBits)
   4978         return Op;
   4979     } else if (OpBits < DestBits) {
   4980       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
   4981       // bits, just sext from i32.
   4982       if (NumSignBits > OpBits-MidBits)
   4983         return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op);
   4984     } else {
   4985       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
   4986       // bits, just truncate to i32.
   4987       if (NumSignBits > OpBits-MidBits)
   4988         return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
   4989     }
   4990 
   4991     // fold (sext (truncate x)) -> (sextinreg x).
   4992     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
   4993                                                  N0.getValueType())) {
   4994       if (OpBits < DestBits)
   4995         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
   4996       else if (OpBits > DestBits)
   4997         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
   4998       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op,
   4999                          DAG.getValueType(N0.getValueType()));
   5000     }
   5001   }
   5002 
   5003   // fold (sext (load x)) -> (sext (truncate (sextload x)))
   5004   // None of the supported targets knows how to perform load and sign extend
   5005   // on vectors in one instruction.  We only perform this transformation on
   5006   // scalars.
   5007   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
   5008       ISD::isUNINDEXEDLoad(N0.getNode()) &&
   5009       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
   5010        TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
   5011     bool DoXform = true;
   5012     SmallVector<SDNode*, 4> SetCCs;
   5013     if (!N0.hasOneUse())
   5014       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
   5015     if (DoXform) {
   5016       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   5017       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
   5018                                        LN0->getChain(),
   5019                                        LN0->getBasePtr(), N0.getValueType(),
   5020                                        LN0->getMemOperand());
   5021       CombineTo(N, ExtLoad);
   5022       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
   5023                                   N0.getValueType(), ExtLoad);
   5024       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
   5025       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
   5026                       ISD::SIGN_EXTEND);
   5027       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   5028     }
   5029   }
   5030 
   5031   // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
   5032   // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
   5033   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
   5034       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
   5035     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   5036     EVT MemVT = LN0->getMemoryVT();
   5037     if ((!LegalOperations && !LN0->isVolatile()) ||
   5038         TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
   5039       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
   5040                                        LN0->getChain(),
   5041                                        LN0->getBasePtr(), MemVT,
   5042                                        LN0->getMemOperand());
   5043       CombineTo(N, ExtLoad);
   5044       CombineTo(N0.getNode(),
   5045                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
   5046                             N0.getValueType(), ExtLoad),
   5047                 ExtLoad.getValue(1));
   5048       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   5049     }
   5050   }
   5051 
   5052   // fold (sext (and/or/xor (load x), cst)) ->
   5053   //      (and/or/xor (sextload x), (sext cst))
   5054   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
   5055        N0.getOpcode() == ISD::XOR) &&
   5056       isa<LoadSDNode>(N0.getOperand(0)) &&
   5057       N0.getOperand(1).getOpcode() == ISD::Constant &&
   5058       TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) &&
   5059       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
   5060     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
   5061     if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
   5062       bool DoXform = true;
   5063       SmallVector<SDNode*, 4> SetCCs;
   5064       if (!N0.hasOneUse())
   5065         DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
   5066                                           SetCCs, TLI);
   5067       if (DoXform) {
   5068         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
   5069                                          LN0->getChain(), LN0->getBasePtr(),
   5070                                          LN0->getMemoryVT(),
   5071                                          LN0->getMemOperand());
   5072         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
   5073         Mask = Mask.sext(VT.getSizeInBits());
   5074         SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
   5075                                   ExtLoad, DAG.getConstant(Mask, VT));
   5076         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
   5077                                     SDLoc(N0.getOperand(0)),
   5078                                     N0.getOperand(0).getValueType(), ExtLoad);
   5079         CombineTo(N, And);
   5080         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
   5081         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
   5082                         ISD::SIGN_EXTEND);
   5083         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   5084       }
   5085     }
   5086   }
   5087 
   5088   if (N0.getOpcode() == ISD::SETCC) {
   5089     EVT N0VT = N0.getOperand(0).getValueType();
   5090     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
   5091     // Only do this before legalize for now.
   5092     if (VT.isVector() && !LegalOperations &&
   5093         TLI.getBooleanContents(N0VT) ==
   5094             TargetLowering::ZeroOrNegativeOneBooleanContent) {
   5095       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
   5096       // of the same size as the compared operands. Only optimize sext(setcc())
   5097       // if this is the case.
   5098       EVT SVT = getSetCCResultType(N0VT);
   5099 
   5100       // We know that the # elements of the results is the same as the
   5101       // # elements of the compare (and the # elements of the compare result
   5102       // for that matter).  Check to see that they are the same size.  If so,
   5103       // we know that the element size of the sext'd result matches the
   5104       // element size of the compare operands.
   5105       if (VT.getSizeInBits() == SVT.getSizeInBits())
   5106         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
   5107                              N0.getOperand(1),
   5108                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
   5109 
   5110       // If the desired elements are smaller or larger than the source
   5111       // elements we can use a matching integer vector type and then
   5112       // truncate/sign extend
   5113       EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
   5114       if (SVT == MatchingVectorType) {
   5115         SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType,
   5116                                N0.getOperand(0), N0.getOperand(1),
   5117                                cast<CondCodeSDNode>(N0.getOperand(2))->get());
   5118         return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
   5119       }
   5120     }
   5121 
   5122     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0)
   5123     unsigned ElementWidth = VT.getScalarType().getSizeInBits();
   5124     SDValue NegOne =
   5125       DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
   5126     SDValue SCC =
   5127       SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
   5128                        NegOne, DAG.getConstant(0, VT),
   5129                        cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
   5130     if (SCC.getNode()) return SCC;
   5131 
   5132     if (!VT.isVector()) {
   5133       EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
   5134       if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) {
   5135         SDLoc DL(N);
   5136         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
   5137         SDValue SetCC = DAG.getSetCC(DL,
   5138                                      SetCCVT,
   5139                                      N0.getOperand(0), N0.getOperand(1), CC);
   5140         EVT SelectVT = getSetCCResultType(VT);
   5141         return DAG.getSelect(DL, VT,
   5142                              DAG.getSExtOrTrunc(SetCC, DL, SelectVT),
   5143                              NegOne, DAG.getConstant(0, VT));
   5144 
   5145       }
   5146     }
   5147   }
   5148 
   5149   // fold (sext x) -> (zext x) if the sign bit is known zero.
   5150   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
   5151       DAG.SignBitIsZero(N0))
   5152     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
   5153 
   5154   return SDValue();
   5155 }
   5156 
   5157 // isTruncateOf - If N is a truncate of some other value, return true, record
   5158 // the value being truncated in Op and which of Op's bits are zero in KnownZero.
   5159 // This function computes KnownZero to avoid a duplicated call to
   5160 // computeKnownBits in the caller.
   5161 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
   5162                          APInt &KnownZero) {
   5163   APInt KnownOne;
   5164   if (N->getOpcode() == ISD::TRUNCATE) {
   5165     Op = N->getOperand(0);
   5166     DAG.computeKnownBits(Op, KnownZero, KnownOne);
   5167     return true;
   5168   }
   5169 
   5170   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
   5171       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
   5172     return false;
   5173 
   5174   SDValue Op0 = N->getOperand(0);
   5175   SDValue Op1 = N->getOperand(1);
   5176   assert(Op0.getValueType() == Op1.getValueType());
   5177 
   5178   ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0);
   5179   ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
   5180   if (COp0 && COp0->isNullValue())
   5181     Op = Op1;
   5182   else if (COp1 && COp1->isNullValue())
   5183     Op = Op0;
   5184   else
   5185     return false;
   5186 
   5187   DAG.computeKnownBits(Op, KnownZero, KnownOne);
   5188 
   5189   if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
   5190     return false;
   5191 
   5192   return true;
   5193 }
   5194 
   5195 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   5196   SDValue N0 = N->getOperand(0);
   5197   EVT VT = N->getValueType(0);
   5198 
   5199   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
   5200                                               LegalOperations))
   5201     return SDValue(Res, 0);
   5202 
   5203   // fold (zext (zext x)) -> (zext x)
   5204   // fold (zext (aext x)) -> (zext x)
   5205   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
   5206     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
   5207                        N0.getOperand(0));
   5208 
   5209   // fold (zext (truncate x)) -> (zext x) or
   5210   //      (zext (truncate x)) -> (truncate x)
   5211   // This is valid when the truncated bits of x are already zero.
   5212   // FIXME: We should extend this to work for vectors too.
   5213   SDValue Op;
   5214   APInt KnownZero;
   5215   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
   5216     APInt TruncatedBits =
   5217       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
   5218       APInt(Op.getValueSizeInBits(), 0) :
   5219       APInt::getBitsSet(Op.getValueSizeInBits(),
   5220                         N0.getValueSizeInBits(),
   5221                         std::min(Op.getValueSizeInBits(),
   5222                                  VT.getSizeInBits()));
   5223     if (TruncatedBits == (KnownZero & TruncatedBits)) {
   5224       if (VT.bitsGT(Op.getValueType()))
   5225         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op);
   5226       if (VT.bitsLT(Op.getValueType()))
   5227         return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
   5228 
   5229       return Op;
   5230     }
   5231   }
   5232 
   5233   // fold (zext (truncate (load x))) -> (zext (smaller load x))
   5234   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
   5235   if (N0.getOpcode() == ISD::TRUNCATE) {
   5236     SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
   5237     if (NarrowLoad.getNode()) {
   5238       SDNode* oye = N0.getNode()->getOperand(0).getNode();
   5239       if (NarrowLoad.getNode() != N0.getNode()) {
   5240         CombineTo(N0.getNode(), NarrowLoad);
   5241         // CombineTo deleted the truncate, if needed, but not what's under it.
   5242         AddToWorkList(oye);
   5243       }
   5244       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   5245     }
   5246   }
   5247 
   5248   // fold (zext (truncate x)) -> (and x, mask)
   5249   if (N0.getOpcode() == ISD::TRUNCATE &&
   5250       (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
   5251 
   5252     // fold (zext (truncate (load x))) -> (zext (smaller load x))
   5253     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
   5254     SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
   5255     if (NarrowLoad.getNode()) {
   5256       SDNode* oye = N0.getNode()->getOperand(0).getNode();
   5257       if (NarrowLoad.getNode() != N0.getNode()) {
   5258         CombineTo(N0.getNode(), NarrowLoad);
   5259         // CombineTo deleted the truncate, if needed, but not what's under it.
   5260         AddToWorkList(oye);
   5261       }
   5262       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   5263     }
   5264 
   5265     SDValue Op = N0.getOperand(0);
   5266     if (Op.getValueType().bitsLT(VT)) {
   5267       Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
   5268       AddToWorkList(Op.getNode());
   5269     } else if (Op.getValueType().bitsGT(VT)) {
   5270       Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
   5271       AddToWorkList(Op.getNode());
   5272     }
   5273     return DAG.getZeroExtendInReg(Op, SDLoc(N),
   5274                                   N0.getValueType().getScalarType());
   5275   }
   5276 
   5277   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
   5278   // if either of the casts is not free.
   5279   if (N0.getOpcode() == ISD::AND &&
   5280       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
   5281       N0.getOperand(1).getOpcode() == ISD::Constant &&
   5282       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
   5283                            N0.getValueType()) ||
   5284        !TLI.isZExtFree(N0.getValueType(), VT))) {
   5285     SDValue X = N0.getOperand(0).getOperand(0);
   5286     if (X.getValueType().bitsLT(VT)) {
   5287       X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X);
   5288     } else if (X.getValueType().bitsGT(VT)) {
   5289       X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
   5290     }
   5291     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
   5292     Mask = Mask.zext(VT.getSizeInBits());
   5293     return DAG.getNode(ISD::AND, SDLoc(N), VT,
   5294                        X, DAG.getConstant(Mask, VT));
   5295   }
   5296 
   5297   // fold (zext (load x)) -> (zext (truncate (zextload x)))
   5298   // None of the supported targets knows how to perform load and vector_zext
   5299   // on vectors in one instruction.  We only perform this transformation on
   5300   // scalars.
   5301   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
   5302       ISD::isUNINDEXEDLoad(N0.getNode()) &&
   5303       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
   5304        TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
   5305     bool DoXform = true;
   5306     SmallVector<SDNode*, 4> SetCCs;
   5307     if (!N0.hasOneUse())
   5308       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
   5309     if (DoXform) {
   5310       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   5311       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
   5312                                        LN0->getChain(),
   5313                                        LN0->getBasePtr(), N0.getValueType(),
   5314                                        LN0->getMemOperand());
   5315       CombineTo(N, ExtLoad);
   5316       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
   5317                                   N0.getValueType(), ExtLoad);
   5318       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
   5319 
   5320       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
   5321                       ISD::ZERO_EXTEND);
   5322       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   5323     }
   5324   }
   5325 
   5326   // fold (zext (and/or/xor (load x), cst)) ->
   5327   //      (and/or/xor (zextload x), (zext cst))
   5328   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
   5329        N0.getOpcode() == ISD::XOR) &&
   5330       isa<LoadSDNode>(N0.getOperand(0)) &&
   5331       N0.getOperand(1).getOpcode() == ISD::Constant &&
   5332       TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) &&
   5333       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
   5334     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
   5335     if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
   5336       bool DoXform = true;
   5337       SmallVector<SDNode*, 4> SetCCs;
   5338       if (!N0.hasOneUse())
   5339         DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND,
   5340                                           SetCCs, TLI);
   5341       if (DoXform) {
   5342         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
   5343                                          LN0->getChain(), LN0->getBasePtr(),
   5344                                          LN0->getMemoryVT(),
   5345                                          LN0->getMemOperand());
   5346         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
   5347         Mask = Mask.zext(VT.getSizeInBits());
   5348         SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
   5349                                   ExtLoad, DAG.getConstant(Mask, VT));
   5350         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
   5351                                     SDLoc(N0.getOperand(0)),
   5352                                     N0.getOperand(0).getValueType(), ExtLoad);
   5353         CombineTo(N, And);
   5354         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
   5355         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
   5356                         ISD::ZERO_EXTEND);
   5357         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   5358       }
   5359     }
   5360   }
   5361 
   5362   // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
   5363   // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
   5364   if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
   5365       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
   5366     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   5367     EVT MemVT = LN0->getMemoryVT();
   5368     if ((!LegalOperations && !LN0->isVolatile()) ||
   5369         TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
   5370       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
   5371                                        LN0->getChain(),
   5372                                        LN0->getBasePtr(), MemVT,
   5373                                        LN0->getMemOperand());
   5374       CombineTo(N, ExtLoad);
   5375       CombineTo(N0.getNode(),
   5376                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
   5377                             ExtLoad),
   5378                 ExtLoad.getValue(1));
   5379       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   5380     }
   5381   }
   5382 
   5383   if (N0.getOpcode() == ISD::SETCC) {
   5384     if (!LegalOperations && VT.isVector() &&
   5385         N0.getValueType().getVectorElementType() == MVT::i1) {
   5386       EVT N0VT = N0.getOperand(0).getValueType();
   5387       if (getSetCCResultType(N0VT) == N0.getValueType())
   5388         return SDValue();
   5389 
   5390       // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
   5391       // Only do this before legalize for now.
   5392       EVT EltVT = VT.getVectorElementType();
   5393       SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
   5394                                     DAG.getConstant(1, EltVT));
   5395       if (VT.getSizeInBits() == N0VT.getSizeInBits())
   5396         // We know that the # elements of the results is the same as the
   5397         // # elements of the compare (and the # elements of the compare result
   5398         // for that matter).  Check to see that they are the same size.  If so,
   5399         // we know that the element size of the sext'd result matches the
   5400         // element size of the compare operands.
   5401         return DAG.getNode(ISD::AND, SDLoc(N), VT,
   5402                            DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
   5403                                          N0.getOperand(1),
   5404                                  cast<CondCodeSDNode>(N0.getOperand(2))->get()),
   5405                            DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT,
   5406                                        OneOps));
   5407 
   5408       // If the desired elements are smaller or larger than the source
   5409       // elements we can use a matching integer vector type and then
   5410       // truncate/sign extend
   5411       EVT MatchingElementType =
   5412         EVT::getIntegerVT(*DAG.getContext(),
   5413                           N0VT.getScalarType().getSizeInBits());
   5414       EVT MatchingVectorType =
   5415         EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
   5416                          N0VT.getVectorNumElements());
   5417       SDValue VsetCC =
   5418         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
   5419                       N0.getOperand(1),
   5420                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
   5421       return DAG.getNode(ISD::AND, SDLoc(N), VT,
   5422                          DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT),
   5423                          DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, OneOps));
   5424     }
   5425 
   5426     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
   5427     SDValue SCC =
   5428       SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
   5429                        DAG.getConstant(1, VT), DAG.getConstant(0, VT),
   5430                        cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
   5431     if (SCC.getNode()) return SCC;
   5432   }
   5433 
   5434   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
   5435   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
   5436       isa<ConstantSDNode>(N0.getOperand(1)) &&
   5437       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
   5438       N0.hasOneUse()) {
   5439     SDValue ShAmt = N0.getOperand(1);
   5440     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
   5441     if (N0.getOpcode() == ISD::SHL) {
   5442       SDValue InnerZExt = N0.getOperand(0);
   5443       // If the original shl may be shifting out bits, do not perform this
   5444       // transformation.
   5445       unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
   5446         InnerZExt.getOperand(0).getValueType().getSizeInBits();
   5447       if (ShAmtVal > KnownZeroBits)
   5448         return SDValue();
   5449     }
   5450 
   5451     SDLoc DL(N);
   5452 
   5453     // Ensure that the shift amount is wide enough for the shifted value.
   5454     if (VT.getSizeInBits() >= 256)
   5455       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
   5456 
   5457     return DAG.getNode(N0.getOpcode(), DL, VT,
   5458                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
   5459                        ShAmt);
   5460   }
   5461 
   5462   return SDValue();
   5463 }
   5464 
   5465 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
   5466   SDValue N0 = N->getOperand(0);
   5467   EVT VT = N->getValueType(0);
   5468 
   5469   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
   5470                                               LegalOperations))
   5471     return SDValue(Res, 0);
   5472 
   5473   // fold (aext (aext x)) -> (aext x)
   5474   // fold (aext (zext x)) -> (zext x)
   5475   // fold (aext (sext x)) -> (sext x)
   5476   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
   5477       N0.getOpcode() == ISD::ZERO_EXTEND ||
   5478       N0.getOpcode() == ISD::SIGN_EXTEND)
   5479     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
   5480 
   5481   // fold (aext (truncate (load x))) -> (aext (smaller load x))
   5482   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
   5483   if (N0.getOpcode() == ISD::TRUNCATE) {
   5484     SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
   5485     if (NarrowLoad.getNode()) {
   5486       SDNode* oye = N0.getNode()->getOperand(0).getNode();
   5487       if (NarrowLoad.getNode() != N0.getNode()) {
   5488         CombineTo(N0.getNode(), NarrowLoad);
   5489         // CombineTo deleted the truncate, if needed, but not what's under it.
   5490         AddToWorkList(oye);
   5491       }
   5492       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   5493     }
   5494   }
   5495 
   5496   // fold (aext (truncate x))
   5497   if (N0.getOpcode() == ISD::TRUNCATE) {
   5498     SDValue TruncOp = N0.getOperand(0);
   5499     if (TruncOp.getValueType() == VT)
   5500       return TruncOp; // x iff x size == zext size.
   5501     if (TruncOp.getValueType().bitsGT(VT))
   5502       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp);
   5503     return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp);
   5504   }
   5505 
   5506   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
   5507   // if the trunc is not free.
   5508   if (N0.getOpcode() == ISD::AND &&
   5509       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
   5510       N0.getOperand(1).getOpcode() == ISD::Constant &&
   5511       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
   5512                           N0.getValueType())) {
   5513     SDValue X = N0.getOperand(0).getOperand(0);
   5514     if (X.getValueType().bitsLT(VT)) {
   5515       X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X);
   5516     } else if (X.getValueType().bitsGT(VT)) {
   5517       X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
   5518     }
   5519     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
   5520     Mask = Mask.zext(VT.getSizeInBits());
   5521     return DAG.getNode(ISD::AND, SDLoc(N), VT,
   5522                        X, DAG.getConstant(Mask, VT));
   5523   }
   5524 
   5525   // fold (aext (load x)) -> (aext (truncate (extload x)))
   5526   // None of the supported targets knows how to perform load and any_ext
   5527   // on vectors in one instruction.  We only perform this transformation on
   5528   // scalars.
   5529   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
   5530       ISD::isUNINDEXEDLoad(N0.getNode()) &&
   5531       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
   5532        TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
   5533     bool DoXform = true;
   5534     SmallVector<SDNode*, 4> SetCCs;
   5535     if (!N0.hasOneUse())
   5536       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
   5537     if (DoXform) {
   5538       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   5539       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
   5540                                        LN0->getChain(),
   5541                                        LN0->getBasePtr(), N0.getValueType(),
   5542                                        LN0->getMemOperand());
   5543       CombineTo(N, ExtLoad);
   5544       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
   5545                                   N0.getValueType(), ExtLoad);
   5546       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
   5547       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
   5548                       ISD::ANY_EXTEND);
   5549       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   5550     }
   5551   }
   5552 
   5553   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
   5554   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
   5555   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
   5556   if (N0.getOpcode() == ISD::LOAD &&
   5557       !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
   5558       N0.hasOneUse()) {
   5559     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   5560     ISD::LoadExtType ExtType = LN0->getExtensionType();
   5561     EVT MemVT = LN0->getMemoryVT();
   5562     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, MemVT)) {
   5563       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
   5564                                        VT, LN0->getChain(), LN0->getBasePtr(),
   5565                                        MemVT, LN0->getMemOperand());
   5566       CombineTo(N, ExtLoad);
   5567       CombineTo(N0.getNode(),
   5568                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
   5569                             N0.getValueType(), ExtLoad),
   5570                 ExtLoad.getValue(1));
   5571       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   5572     }
   5573   }
   5574 
   5575   if (N0.getOpcode() == ISD::SETCC) {
   5576     // For vectors:
   5577     // aext(setcc) -> vsetcc
   5578     // aext(setcc) -> truncate(vsetcc)
   5579     // aext(setcc) -> aext(vsetcc)
   5580     // Only do this before legalize for now.
   5581     if (VT.isVector() && !LegalOperations) {
   5582       EVT N0VT = N0.getOperand(0).getValueType();
   5583         // We know that the # elements of the results is the same as the
   5584         // # elements of the compare (and the # elements of the compare result
   5585         // for that matter).  Check to see that they are the same size.  If so,
   5586         // we know that the element size of the sext'd result matches the
   5587         // element size of the compare operands.
   5588       if (VT.getSizeInBits() == N0VT.getSizeInBits())
   5589         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
   5590                              N0.getOperand(1),
   5591                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
   5592       // If the desired elements are smaller or larger than the source
   5593       // elements we can use a matching integer vector type and then
   5594       // truncate/any extend
   5595       else {
   5596         EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
   5597         SDValue VsetCC =
   5598           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
   5599                         N0.getOperand(1),
   5600                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
   5601         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
   5602       }
   5603     }
   5604 
   5605     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
   5606     SDValue SCC =
   5607       SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
   5608                        DAG.getConstant(1, VT), DAG.getConstant(0, VT),
   5609                        cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
   5610     if (SCC.getNode())
   5611       return SCC;
   5612   }
   5613 
   5614   return SDValue();
   5615 }
   5616 
   5617 /// GetDemandedBits - See if the specified operand can be simplified with the
   5618 /// knowledge that only the bits specified by Mask are used.  If so, return the
   5619 /// simpler operand, otherwise return a null SDValue.
   5620 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
   5621   switch (V.getOpcode()) {
   5622   default: break;
   5623   case ISD::Constant: {
   5624     const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
   5625     assert(CV && "Const value should be ConstSDNode.");
   5626     const APInt &CVal = CV->getAPIntValue();
   5627     APInt NewVal = CVal & Mask;
   5628     if (NewVal != CVal)
   5629       return DAG.getConstant(NewVal, V.getValueType());
   5630     break;
   5631   }
   5632   case ISD::OR:
   5633   case ISD::XOR:
   5634     // If the LHS or RHS don't contribute bits to the or, drop them.
   5635     if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
   5636       return V.getOperand(1);
   5637     if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
   5638       return V.getOperand(0);
   5639     break;
   5640   case ISD::SRL:
   5641     // Only look at single-use SRLs.
   5642     if (!V.getNode()->hasOneUse())
   5643       break;
   5644     if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
   5645       // See if we can recursively simplify the LHS.
   5646       unsigned Amt = RHSC->getZExtValue();
   5647 
   5648       // Watch out for shift count overflow though.
   5649       if (Amt >= Mask.getBitWidth()) break;
   5650       APInt NewMask = Mask << Amt;
   5651       SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
   5652       if (SimplifyLHS.getNode())
   5653         return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
   5654                            SimplifyLHS, V.getOperand(1));
   5655     }
   5656   }
   5657   return SDValue();
   5658 }
   5659 
   5660 /// ReduceLoadWidth - If the result of a wider load is shifted to right of N
   5661 /// bits and then truncated to a narrower type and where N is a multiple
   5662 /// of number of bits of the narrower type, transform it to a narrower load
   5663 /// from address + N / num of bits of new type. If the result is to be
   5664 /// extended, also fold the extension to form a extending load.
   5665 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   5666   unsigned Opc = N->getOpcode();
   5667 
   5668   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
   5669   SDValue N0 = N->getOperand(0);
   5670   EVT VT = N->getValueType(0);
   5671   EVT ExtVT = VT;
   5672 
   5673   // This transformation isn't valid for vector loads.
   5674   if (VT.isVector())
   5675     return SDValue();
   5676 
   5677   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
   5678   // extended to VT.
   5679   if (Opc == ISD::SIGN_EXTEND_INREG) {
   5680     ExtType = ISD::SEXTLOAD;
   5681     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
   5682   } else if (Opc == ISD::SRL) {
   5683     // Another special-case: SRL is basically zero-extending a narrower value.
   5684     ExtType = ISD::ZEXTLOAD;
   5685     N0 = SDValue(N, 0);
   5686     ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   5687     if (!N01) return SDValue();
   5688     ExtVT = EVT::getIntegerVT(*DAG.getContext(),
   5689                               VT.getSizeInBits() - N01->getZExtValue());
   5690   }
   5691   if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT))
   5692     return SDValue();
   5693 
   5694   unsigned EVTBits = ExtVT.getSizeInBits();
   5695 
   5696   // Do not generate loads of non-round integer types since these can
   5697   // be expensive (and would be wrong if the type is not byte sized).
   5698   if (!ExtVT.isRound())
   5699     return SDValue();
   5700 
   5701   unsigned ShAmt = 0;
   5702   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
   5703     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
   5704       ShAmt = N01->getZExtValue();
   5705       // Is the shift amount a multiple of size of VT?
   5706       if ((ShAmt & (EVTBits-1)) == 0) {
   5707         N0 = N0.getOperand(0);
   5708         // Is the load width a multiple of size of VT?
   5709         if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
   5710           return SDValue();
   5711       }
   5712 
   5713       // At this point, we must have a load or else we can't do the transform.
   5714       if (!isa<LoadSDNode>(N0)) return SDValue();
   5715 
   5716       // Because a SRL must be assumed to *need* to zero-extend the high bits
   5717       // (as opposed to anyext the high bits), we can't combine the zextload
   5718       // lowering of SRL and an sextload.
   5719       if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
   5720         return SDValue();
   5721 
   5722       // If the shift amount is larger than the input type then we're not
   5723       // accessing any of the loaded bytes.  If the load was a zextload/extload
   5724       // then the result of the shift+trunc is zero/undef (handled elsewhere).
   5725       if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
   5726         return SDValue();
   5727     }
   5728   }
   5729 
   5730   // If the load is shifted left (and the result isn't shifted back right),
   5731   // we can fold the truncate through the shift.
   5732   unsigned ShLeftAmt = 0;
   5733   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
   5734       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
   5735     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
   5736       ShLeftAmt = N01->getZExtValue();
   5737       N0 = N0.getOperand(0);
   5738     }
   5739   }
   5740 
   5741   // If we haven't found a load, we can't narrow it.  Don't transform one with
   5742   // multiple uses, this would require adding a new load.
   5743   if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
   5744     return SDValue();
   5745 
   5746   // Don't change the width of a volatile load.
   5747   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   5748   if (LN0->isVolatile())
   5749     return SDValue();
   5750 
   5751   // Verify that we are actually reducing a load width here.
   5752   if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
   5753     return SDValue();
   5754 
   5755   // For the transform to be legal, the load must produce only two values
   5756   // (the value loaded and the chain).  Don't transform a pre-increment
   5757   // load, for example, which produces an extra value.  Otherwise the
   5758   // transformation is not equivalent, and the downstream logic to replace
   5759   // uses gets things wrong.
   5760   if (LN0->getNumValues() > 2)
   5761     return SDValue();
   5762 
   5763   // If the load that we're shrinking is an extload and we're not just
   5764   // discarding the extension we can't simply shrink the load. Bail.
   5765   // TODO: It would be possible to merge the extensions in some cases.
   5766   if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
   5767       LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
   5768     return SDValue();
   5769 
   5770   EVT PtrType = N0.getOperand(1).getValueType();
   5771 
   5772   if (PtrType == MVT::Untyped || PtrType.isExtended())
   5773     // It's not possible to generate a constant of extended or untyped type.
   5774     return SDValue();
   5775 
   5776   // For big endian targets, we need to adjust the offset to the pointer to
   5777   // load the correct bytes.
   5778   if (TLI.isBigEndian()) {
   5779     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
   5780     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
   5781     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
   5782   }
   5783 
   5784   uint64_t PtrOff = ShAmt / 8;
   5785   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
   5786   SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0),
   5787                                PtrType, LN0->getBasePtr(),
   5788                                DAG.getConstant(PtrOff, PtrType));
   5789   AddToWorkList(NewPtr.getNode());
   5790 
   5791   SDValue Load;
   5792   if (ExtType == ISD::NON_EXTLOAD)
   5793     Load =  DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
   5794                         LN0->getPointerInfo().getWithOffset(PtrOff),
   5795                         LN0->isVolatile(), LN0->isNonTemporal(),
   5796                         LN0->isInvariant(), NewAlign, LN0->getTBAAInfo());
   5797   else
   5798     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr,
   5799                           LN0->getPointerInfo().getWithOffset(PtrOff),
   5800                           ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
   5801                           NewAlign, LN0->getTBAAInfo());
   5802 
   5803   // Replace the old load's chain with the new load's chain.
   5804   WorkListRemover DeadNodes(*this);
   5805   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
   5806 
   5807   // Shift the result left, if we've swallowed a left shift.
   5808   SDValue Result = Load;
   5809   if (ShLeftAmt != 0) {
   5810     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
   5811     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
   5812       ShImmTy = VT;
   5813     // If the shift amount is as large as the result size (but, presumably,
   5814     // no larger than the source) then the useful bits of the result are
   5815     // zero; we can't simply return the shortened shift, because the result
   5816     // of that operation is undefined.
   5817     if (ShLeftAmt >= VT.getSizeInBits())
   5818       Result = DAG.getConstant(0, VT);
   5819     else
   5820       Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT,
   5821                           Result, DAG.getConstant(ShLeftAmt, ShImmTy));
   5822   }
   5823 
   5824   // Return the new loaded value.
   5825   return Result;
   5826 }
   5827 
   5828 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   5829   SDValue N0 = N->getOperand(0);
   5830   SDValue N1 = N->getOperand(1);
   5831   EVT VT = N->getValueType(0);
   5832   EVT EVT = cast<VTSDNode>(N1)->getVT();
   5833   unsigned VTBits = VT.getScalarType().getSizeInBits();
   5834   unsigned EVTBits = EVT.getScalarType().getSizeInBits();
   5835 
   5836   // fold (sext_in_reg c1) -> c1
   5837   if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
   5838     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
   5839 
   5840   // If the input is already sign extended, just drop the extension.
   5841   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
   5842     return N0;
   5843 
   5844   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
   5845   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
   5846       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
   5847     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
   5848                        N0.getOperand(0), N1);
   5849 
   5850   // fold (sext_in_reg (sext x)) -> (sext x)
   5851   // fold (sext_in_reg (aext x)) -> (sext x)
   5852   // if x is small enough.
   5853   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
   5854     SDValue N00 = N0.getOperand(0);
   5855     if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
   5856         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
   5857       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
   5858   }
   5859 
   5860   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
   5861   if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
   5862     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT);
   5863 
   5864   // fold operands of sext_in_reg based on knowledge that the top bits are not
   5865   // demanded.
   5866   if (SimplifyDemandedBits(SDValue(N, 0)))
   5867     return SDValue(N, 0);
   5868 
   5869   // fold (sext_in_reg (load x)) -> (smaller sextload x)
   5870   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
   5871   SDValue NarrowLoad = ReduceLoadWidth(N);
   5872   if (NarrowLoad.getNode())
   5873     return NarrowLoad;
   5874 
   5875   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
   5876   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
   5877   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
   5878   if (N0.getOpcode() == ISD::SRL) {
   5879     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
   5880       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
   5881         // We can turn this into an SRA iff the input to the SRL is already sign
   5882         // extended enough.
   5883         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
   5884         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
   5885           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
   5886                              N0.getOperand(0), N0.getOperand(1));
   5887       }
   5888   }
   5889 
   5890   // fold (sext_inreg (extload x)) -> (sextload x)
   5891   if (ISD::isEXTLoad(N0.getNode()) &&
   5892       ISD::isUNINDEXEDLoad(N0.getNode()) &&
   5893       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
   5894       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
   5895        TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
   5896     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   5897     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
   5898                                      LN0->getChain(),
   5899                                      LN0->getBasePtr(), EVT,
   5900                                      LN0->getMemOperand());
   5901     CombineTo(N, ExtLoad);
   5902     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
   5903     AddToWorkList(ExtLoad.getNode());
   5904     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   5905   }
   5906   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
   5907   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
   5908       N0.hasOneUse() &&
   5909       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
   5910       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
   5911        TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
   5912     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   5913     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
   5914                                      LN0->getChain(),
   5915                                      LN0->getBasePtr(), EVT,
   5916                                      LN0->getMemOperand());
   5917     CombineTo(N, ExtLoad);
   5918     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
   5919     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   5920   }
   5921 
   5922   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
   5923   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
   5924     SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
   5925                                        N0.getOperand(1), false);
   5926     if (BSwap.getNode())
   5927       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
   5928                          BSwap, N1);
   5929   }
   5930 
   5931   // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs
   5932   // into a build_vector.
   5933   if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
   5934     SmallVector<SDValue, 8> Elts;
   5935     unsigned NumElts = N0->getNumOperands();
   5936     unsigned ShAmt = VTBits - EVTBits;
   5937 
   5938     for (unsigned i = 0; i != NumElts; ++i) {
   5939       SDValue Op = N0->getOperand(i);
   5940       if (Op->getOpcode() == ISD::UNDEF) {
   5941         Elts.push_back(Op);
   5942         continue;
   5943       }
   5944 
   5945       ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
   5946       const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
   5947       Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
   5948                                      Op.getValueType()));
   5949     }
   5950 
   5951     return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts);
   5952   }
   5953 
   5954   return SDValue();
   5955 }
   5956 
   5957 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   5958   SDValue N0 = N->getOperand(0);
   5959   EVT VT = N->getValueType(0);
   5960   bool isLE = TLI.isLittleEndian();
   5961 
   5962   // noop truncate
   5963   if (N0.getValueType() == N->getValueType(0))
   5964     return N0;
   5965   // fold (truncate c1) -> c1
   5966   if (isa<ConstantSDNode>(N0))
   5967     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
   5968   // fold (truncate (truncate x)) -> (truncate x)
   5969   if (N0.getOpcode() == ISD::TRUNCATE)
   5970     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
   5971   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
   5972   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
   5973       N0.getOpcode() == ISD::SIGN_EXTEND ||
   5974       N0.getOpcode() == ISD::ANY_EXTEND) {
   5975     if (N0.getOperand(0).getValueType().bitsLT(VT))
   5976       // if the source is smaller than the dest, we still need an extend
   5977       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
   5978                          N0.getOperand(0));
   5979     if (N0.getOperand(0).getValueType().bitsGT(VT))
   5980       // if the source is larger than the dest, than we just need the truncate
   5981       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
   5982     // if the source and dest are the same type, we can drop both the extend
   5983     // and the truncate.
   5984     return N0.getOperand(0);
   5985   }
   5986 
   5987   // Fold extract-and-trunc into a narrow extract. For example:
   5988   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
   5989   //   i32 y = TRUNCATE(i64 x)
   5990   //        -- becomes --
   5991   //   v16i8 b = BITCAST (v2i64 val)
   5992   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
   5993   //
   5994   // Note: We only run this optimization after type legalization (which often
   5995   // creates this pattern) and before operation legalization after which
   5996   // we need to be more careful about the vector instructions that we generate.
   5997   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
   5998       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
   5999 
   6000     EVT VecTy = N0.getOperand(0).getValueType();
   6001     EVT ExTy = N0.getValueType();
   6002     EVT TrTy = N->getValueType(0);
   6003 
   6004     unsigned NumElem = VecTy.getVectorNumElements();
   6005     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
   6006 
   6007     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
   6008     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
   6009 
   6010     SDValue EltNo = N0->getOperand(1);
   6011     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
   6012       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
   6013       EVT IndexTy = TLI.getVectorIdxTy();
   6014       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
   6015 
   6016       SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N),
   6017                               NVT, N0.getOperand(0));
   6018 
   6019       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
   6020                          SDLoc(N), TrTy, V,
   6021                          DAG.getConstant(Index, IndexTy));
   6022     }
   6023   }
   6024 
   6025   // Fold a series of buildvector, bitcast, and truncate if possible.
   6026   // For example fold
   6027   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
   6028   //   (2xi32 (buildvector x, y)).
   6029   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
   6030       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
   6031       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
   6032       N0.getOperand(0).hasOneUse()) {
   6033 
   6034     SDValue BuildVect = N0.getOperand(0);
   6035     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
   6036     EVT TruncVecEltTy = VT.getVectorElementType();
   6037 
   6038     // Check that the element types match.
   6039     if (BuildVectEltTy == TruncVecEltTy) {
   6040       // Now we only need to compute the offset of the truncated elements.
   6041       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
   6042       unsigned TruncVecNumElts = VT.getVectorNumElements();
   6043       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
   6044 
   6045       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
   6046              "Invalid number of elements");
   6047 
   6048       SmallVector<SDValue, 8> Opnds;
   6049       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
   6050         Opnds.push_back(BuildVect.getOperand(i));
   6051 
   6052       return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
   6053     }
   6054   }
   6055 
   6056   // See if we can simplify the input to this truncate through knowledge that
   6057   // only the low bits are being used.
   6058   // For example "trunc (or (shl x, 8), y)" // -> trunc y
   6059   // Currently we only perform this optimization on scalars because vectors
   6060   // may have different active low bits.
   6061   if (!VT.isVector()) {
   6062     SDValue Shorter =
   6063       GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
   6064                                                VT.getSizeInBits()));
   6065     if (Shorter.getNode())
   6066       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
   6067   }
   6068   // fold (truncate (load x)) -> (smaller load x)
   6069   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
   6070   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
   6071     SDValue Reduced = ReduceLoadWidth(N);
   6072     if (Reduced.getNode())
   6073       return Reduced;
   6074     // Handle the case where the load remains an extending load even
   6075     // after truncation.
   6076     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
   6077       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   6078       if (!LN0->isVolatile() &&
   6079           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
   6080         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
   6081                                          VT, LN0->getChain(), LN0->getBasePtr(),
   6082                                          LN0->getMemoryVT(),
   6083                                          LN0->getMemOperand());
   6084         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
   6085         return NewLoad;
   6086       }
   6087     }
   6088   }
   6089   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
   6090   // where ... are all 'undef'.
   6091   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
   6092     SmallVector<EVT, 8> VTs;
   6093     SDValue V;
   6094     unsigned Idx = 0;
   6095     unsigned NumDefs = 0;
   6096 
   6097     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
   6098       SDValue X = N0.getOperand(i);
   6099       if (X.getOpcode() != ISD::UNDEF) {
   6100         V = X;
   6101         Idx = i;
   6102         NumDefs++;
   6103       }
   6104       // Stop if more than one members are non-undef.
   6105       if (NumDefs > 1)
   6106         break;
   6107       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
   6108                                      VT.getVectorElementType(),
   6109                                      X.getValueType().getVectorNumElements()));
   6110     }
   6111 
   6112     if (NumDefs == 0)
   6113       return DAG.getUNDEF(VT);
   6114 
   6115     if (NumDefs == 1) {
   6116       assert(V.getNode() && "The single defined operand is empty!");
   6117       SmallVector<SDValue, 8> Opnds;
   6118       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
   6119         if (i != Idx) {
   6120           Opnds.push_back(DAG.getUNDEF(VTs[i]));
   6121           continue;
   6122         }
   6123         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
   6124         AddToWorkList(NV.getNode());
   6125         Opnds.push_back(NV);
   6126       }
   6127       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
   6128     }
   6129   }
   6130 
   6131   // Simplify the operands using demanded-bits information.
   6132   if (!VT.isVector() &&
   6133       SimplifyDemandedBits(SDValue(N, 0)))
   6134     return SDValue(N, 0);
   6135 
   6136   return SDValue();
   6137 }
   6138 
   6139 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
   6140   SDValue Elt = N->getOperand(i);
   6141   if (Elt.getOpcode() != ISD::MERGE_VALUES)
   6142     return Elt.getNode();
   6143   return Elt.getOperand(Elt.getResNo()).getNode();
   6144 }
   6145 
   6146 /// CombineConsecutiveLoads - build_pair (load, load) -> load
   6147 /// if load locations are consecutive.
   6148 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
   6149   assert(N->getOpcode() == ISD::BUILD_PAIR);
   6150 
   6151   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
   6152   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
   6153   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
   6154       LD1->getAddressSpace() != LD2->getAddressSpace())
   6155     return SDValue();
   6156   EVT LD1VT = LD1->getValueType(0);
   6157 
   6158   if (ISD::isNON_EXTLoad(LD2) &&
   6159       LD2->hasOneUse() &&
   6160       // If both are volatile this would reduce the number of volatile loads.
   6161       // If one is volatile it might be ok, but play conservative and bail out.
   6162       !LD1->isVolatile() &&
   6163       !LD2->isVolatile() &&
   6164       DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
   6165     unsigned Align = LD1->getAlignment();
   6166     unsigned NewAlign = TLI.getDataLayout()->
   6167       getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
   6168 
   6169     if (NewAlign <= Align &&
   6170         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
   6171       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(),
   6172                          LD1->getBasePtr(), LD1->getPointerInfo(),
   6173                          false, false, false, Align);
   6174   }
   6175 
   6176   return SDValue();
   6177 }
   6178 
   6179 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   6180   SDValue N0 = N->getOperand(0);
   6181   EVT VT = N->getValueType(0);
   6182 
   6183   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
   6184   // Only do this before legalize, since afterward the target may be depending
   6185   // on the bitconvert.
   6186   // First check to see if this is all constant.
   6187   if (!LegalTypes &&
   6188       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
   6189       VT.isVector()) {
   6190     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
   6191 
   6192     EVT DestEltVT = N->getValueType(0).getVectorElementType();
   6193     assert(!DestEltVT.isVector() &&
   6194            "Element type of vector ValueType must not be vector!");
   6195     if (isSimple)
   6196       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
   6197   }
   6198 
   6199   // If the input is a constant, let getNode fold it.
   6200   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
   6201     SDValue Res = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0);
   6202     if (Res.getNode() != N) {
   6203       if (!LegalOperations ||
   6204           TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
   6205         return Res;
   6206 
   6207       // Folding it resulted in an illegal node, and it's too late to
   6208       // do that. Clean up the old node and forego the transformation.
   6209       // Ideally this won't happen very often, because instcombine
   6210       // and the earlier dagcombine runs (where illegal nodes are
   6211       // permitted) should have folded most of them already.
   6212       DAG.DeleteNode(Res.getNode());
   6213     }
   6214   }
   6215 
   6216   // (conv (conv x, t1), t2) -> (conv x, t2)
   6217   if (N0.getOpcode() == ISD::BITCAST)
   6218     return DAG.getNode(ISD::BITCAST, SDLoc(N), VT,
   6219                        N0.getOperand(0));
   6220 
   6221   // fold (conv (load x)) -> (load (conv*)x)
   6222   // If the resultant load doesn't need a higher alignment than the original!
   6223   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
   6224       // Do not change the width of a volatile load.
   6225       !cast<LoadSDNode>(N0)->isVolatile() &&
   6226       // Do not remove the cast if the types differ in endian layout.
   6227       TLI.hasBigEndianPartOrdering(N0.getValueType()) ==
   6228       TLI.hasBigEndianPartOrdering(VT) &&
   6229       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
   6230       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
   6231     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   6232     unsigned Align = TLI.getDataLayout()->
   6233       getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
   6234     unsigned OrigAlign = LN0->getAlignment();
   6235 
   6236     if (Align <= OrigAlign) {
   6237       SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(),
   6238                                  LN0->getBasePtr(), LN0->getPointerInfo(),
   6239                                  LN0->isVolatile(), LN0->isNonTemporal(),
   6240                                  LN0->isInvariant(), OrigAlign,
   6241                                  LN0->getTBAAInfo());
   6242       AddToWorkList(N);
   6243       CombineTo(N0.getNode(),
   6244                 DAG.getNode(ISD::BITCAST, SDLoc(N0),
   6245                             N0.getValueType(), Load),
   6246                 Load.getValue(1));
   6247       return Load;
   6248     }
   6249   }
   6250 
   6251   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
   6252   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
   6253   // This often reduces constant pool loads.
   6254   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
   6255        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
   6256       N0.getNode()->hasOneUse() && VT.isInteger() &&
   6257       !VT.isVector() && !N0.getValueType().isVector()) {
   6258     SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT,
   6259                                   N0.getOperand(0));
   6260     AddToWorkList(NewConv.getNode());
   6261 
   6262     APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
   6263     if (N0.getOpcode() == ISD::FNEG)
   6264       return DAG.getNode(ISD::XOR, SDLoc(N), VT,
   6265                          NewConv, DAG.getConstant(SignBit, VT));
   6266     assert(N0.getOpcode() == ISD::FABS);
   6267     return DAG.getNode(ISD::AND, SDLoc(N), VT,
   6268                        NewConv, DAG.getConstant(~SignBit, VT));
   6269   }
   6270 
   6271   // fold (bitconvert (fcopysign cst, x)) ->
   6272   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
   6273   // Note that we don't handle (copysign x, cst) because this can always be
   6274   // folded to an fneg or fabs.
   6275   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
   6276       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
   6277       VT.isInteger() && !VT.isVector()) {
   6278     unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
   6279     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
   6280     if (isTypeLegal(IntXVT)) {
   6281       SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0),
   6282                               IntXVT, N0.getOperand(1));
   6283       AddToWorkList(X.getNode());
   6284 
   6285       // If X has a different width than the result/lhs, sext it or truncate it.
   6286       unsigned VTWidth = VT.getSizeInBits();
   6287       if (OrigXWidth < VTWidth) {
   6288         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
   6289         AddToWorkList(X.getNode());
   6290       } else if (OrigXWidth > VTWidth) {
   6291         // To get the sign bit in the right place, we have to shift it right
   6292         // before truncating.
   6293         X = DAG.getNode(ISD::SRL, SDLoc(X),
   6294                         X.getValueType(), X,
   6295                         DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
   6296         AddToWorkList(X.getNode());
   6297         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
   6298         AddToWorkList(X.getNode());
   6299       }
   6300 
   6301       APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
   6302       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
   6303                       X, DAG.getConstant(SignBit, VT));
   6304       AddToWorkList(X.getNode());
   6305 
   6306       SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0),
   6307                                 VT, N0.getOperand(0));
   6308       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
   6309                         Cst, DAG.getConstant(~SignBit, VT));
   6310       AddToWorkList(Cst.getNode());
   6311 
   6312       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
   6313     }
   6314   }
   6315 
   6316   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
   6317   if (N0.getOpcode() == ISD::BUILD_PAIR) {
   6318     SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
   6319     if (CombineLD.getNode())
   6320       return CombineLD;
   6321   }
   6322 
   6323   return SDValue();
   6324 }
   6325 
   6326 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
   6327   EVT VT = N->getValueType(0);
   6328   return CombineConsecutiveLoads(N, VT);
   6329 }
   6330 
   6331 /// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector
   6332 /// node with Constant, ConstantFP or Undef operands.  DstEltVT indicates the
   6333 /// destination element value type.
   6334 SDValue DAGCombiner::
   6335 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
   6336   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
   6337 
   6338   // If this is already the right type, we're done.
   6339   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
   6340 
   6341   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
   6342   unsigned DstBitSize = DstEltVT.getSizeInBits();
   6343 
   6344   // If this is a conversion of N elements of one type to N elements of another
   6345   // type, convert each element.  This handles FP<->INT cases.
   6346   if (SrcBitSize == DstBitSize) {
   6347     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
   6348                               BV->getValueType(0).getVectorNumElements());
   6349 
   6350     // Due to the FP element handling below calling this routine recursively,
   6351     // we can end up with a scalar-to-vector node here.
   6352     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
   6353       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
   6354                          DAG.getNode(ISD::BITCAST, SDLoc(BV),
   6355                                      DstEltVT, BV->getOperand(0)));
   6356 
   6357     SmallVector<SDValue, 8> Ops;
   6358     for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
   6359       SDValue Op = BV->getOperand(i);
   6360       // If the vector element type is not legal, the BUILD_VECTOR operands
   6361       // are promoted and implicitly truncated.  Make that explicit here.
   6362       if (Op.getValueType() != SrcEltVT)
   6363         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
   6364       Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV),
   6365                                 DstEltVT, Op));
   6366       AddToWorkList(Ops.back().getNode());
   6367     }
   6368     return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
   6369   }
   6370 
   6371   // Otherwise, we're growing or shrinking the elements.  To avoid having to
   6372   // handle annoying details of growing/shrinking FP values, we convert them to
   6373   // int first.
   6374   if (SrcEltVT.isFloatingPoint()) {
   6375     // Convert the input float vector to a int vector where the elements are the
   6376     // same sizes.
   6377     assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
   6378     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
   6379     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
   6380     SrcEltVT = IntVT;
   6381   }
   6382 
   6383   // Now we know the input is an integer vector.  If the output is a FP type,
   6384   // convert to integer first, then to FP of the right size.
   6385   if (DstEltVT.isFloatingPoint()) {
   6386     assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
   6387     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
   6388     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
   6389 
   6390     // Next, convert to FP elements of the same size.
   6391     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
   6392   }
   6393 
   6394   // Okay, we know the src/dst types are both integers of differing types.
   6395   // Handling growing first.
   6396   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
   6397   if (SrcBitSize < DstBitSize) {
   6398     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
   6399 
   6400     SmallVector<SDValue, 8> Ops;
   6401     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
   6402          i += NumInputsPerOutput) {
   6403       bool isLE = TLI.isLittleEndian();
   6404       APInt NewBits = APInt(DstBitSize, 0);
   6405       bool EltIsUndef = true;
   6406       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
   6407         // Shift the previously computed bits over.
   6408         NewBits <<= SrcBitSize;
   6409         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
   6410         if (Op.getOpcode() == ISD::UNDEF) continue;
   6411         EltIsUndef = false;
   6412 
   6413         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
   6414                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
   6415       }
   6416 
   6417       if (EltIsUndef)
   6418         Ops.push_back(DAG.getUNDEF(DstEltVT));
   6419       else
   6420         Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
   6421     }
   6422 
   6423     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
   6424     return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
   6425   }
   6426 
   6427   // Finally, this must be the case where we are shrinking elements: each input
   6428   // turns into multiple outputs.
   6429   bool isS2V = ISD::isScalarToVector(BV);
   6430   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
   6431   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
   6432                             NumOutputsPerInput*BV->getNumOperands());
   6433   SmallVector<SDValue, 8> Ops;
   6434 
   6435   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
   6436     if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
   6437       for (unsigned j = 0; j != NumOutputsPerInput; ++j)
   6438         Ops.push_back(DAG.getUNDEF(DstEltVT));
   6439       continue;
   6440     }
   6441 
   6442     APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))->
   6443                   getAPIntValue().zextOrTrunc(SrcBitSize);
   6444 
   6445     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
   6446       APInt ThisVal = OpVal.trunc(DstBitSize);
   6447       Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
   6448       if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)
   6449         // Simply turn this into a SCALAR_TO_VECTOR of the new type.
   6450         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
   6451                            Ops[0]);
   6452       OpVal = OpVal.lshr(DstBitSize);
   6453     }
   6454 
   6455     // For big endian targets, swap the order of the pieces of each element.
   6456     if (TLI.isBigEndian())
   6457       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
   6458   }
   6459 
   6460   return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
   6461 }
   6462 
   6463 SDValue DAGCombiner::visitFADD(SDNode *N) {
   6464   SDValue N0 = N->getOperand(0);
   6465   SDValue N1 = N->getOperand(1);
   6466   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   6467   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   6468   EVT VT = N->getValueType(0);
   6469 
   6470   // fold vector ops
   6471   if (VT.isVector()) {
   6472     SDValue FoldedVOp = SimplifyVBinOp(N);
   6473     if (FoldedVOp.getNode()) return FoldedVOp;
   6474   }
   6475 
   6476   // fold (fadd c1, c2) -> c1 + c2
   6477   if (N0CFP && N1CFP)
   6478     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1);
   6479   // canonicalize constant to RHS
   6480   if (N0CFP && !N1CFP)
   6481     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0);
   6482   // fold (fadd A, 0) -> A
   6483   if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
   6484       N1CFP->getValueAPF().isZero())
   6485     return N0;
   6486   // fold (fadd A, (fneg B)) -> (fsub A, B)
   6487   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
   6488     isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
   6489     return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0,
   6490                        GetNegatedExpression(N1, DAG, LegalOperations));
   6491   // fold (fadd (fneg A), B) -> (fsub B, A)
   6492   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
   6493     isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
   6494     return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1,
   6495                        GetNegatedExpression(N0, DAG, LegalOperations));
   6496 
   6497   // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
   6498   if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
   6499       N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
   6500       isa<ConstantFPSDNode>(N0.getOperand(1)))
   6501     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0),
   6502                        DAG.getNode(ISD::FADD, SDLoc(N), VT,
   6503                                    N0.getOperand(1), N1));
   6504 
   6505   // No FP constant should be created after legalization as Instruction
   6506   // Selection pass has hard time in dealing with FP constant.
   6507   //
   6508   // We don't need test this condition for transformation like following, as
   6509   // the DAG being transformed implies it is legal to take FP constant as
   6510   // operand.
   6511   //
   6512   //  (fadd (fmul c, x), x) -> (fmul c+1, x)
   6513   //
   6514   bool AllowNewFpConst = (Level < AfterLegalizeDAG);
   6515 
   6516   // If allow, fold (fadd (fneg x), x) -> 0.0
   6517   if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
   6518       N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
   6519     return DAG.getConstantFP(0.0, VT);
   6520 
   6521     // If allow, fold (fadd x, (fneg x)) -> 0.0
   6522   if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
   6523       N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
   6524     return DAG.getConstantFP(0.0, VT);
   6525 
   6526   // In unsafe math mode, we can fold chains of FADD's of the same value
   6527   // into multiplications.  This transform is not safe in general because
   6528   // we are reducing the number of rounding steps.
   6529   if (DAG.getTarget().Options.UnsafeFPMath &&
   6530       TLI.isOperationLegalOrCustom(ISD::FMUL, VT) &&
   6531       !N0CFP && !N1CFP) {
   6532     if (N0.getOpcode() == ISD::FMUL) {
   6533       ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
   6534       ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
   6535 
   6536       // (fadd (fmul c, x), x) -> (fmul x, c+1)
   6537       if (CFP00 && !CFP01 && N0.getOperand(1) == N1) {
   6538         SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
   6539                                      SDValue(CFP00, 0),
   6540                                      DAG.getConstantFP(1.0, VT));
   6541         return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
   6542                            N1, NewCFP);
   6543       }
   6544 
   6545       // (fadd (fmul x, c), x) -> (fmul x, c+1)
   6546       if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
   6547         SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
   6548                                      SDValue(CFP01, 0),
   6549                                      DAG.getConstantFP(1.0, VT));
   6550         return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
   6551                            N1, NewCFP);
   6552       }
   6553 
   6554       // (fadd (fmul c, x), (fadd x, x)) -> (fmul x, c+2)
   6555       if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD &&
   6556           N1.getOperand(0) == N1.getOperand(1) &&
   6557           N0.getOperand(1) == N1.getOperand(0)) {
   6558         SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
   6559                                      SDValue(CFP00, 0),
   6560                                      DAG.getConstantFP(2.0, VT));
   6561         return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
   6562                            N0.getOperand(1), NewCFP);
   6563       }
   6564 
   6565       // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
   6566       if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
   6567           N1.getOperand(0) == N1.getOperand(1) &&
   6568           N0.getOperand(0) == N1.getOperand(0)) {
   6569         SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
   6570                                      SDValue(CFP01, 0),
   6571                                      DAG.getConstantFP(2.0, VT));
   6572         return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
   6573                            N0.getOperand(0), NewCFP);
   6574       }
   6575     }
   6576 
   6577     if (N1.getOpcode() == ISD::FMUL) {
   6578       ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
   6579       ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
   6580 
   6581       // (fadd x, (fmul c, x)) -> (fmul x, c+1)
   6582       if (CFP10 && !CFP11 && N1.getOperand(1) == N0) {
   6583         SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
   6584                                      SDValue(CFP10, 0),
   6585                                      DAG.getConstantFP(1.0, VT));
   6586         return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
   6587                            N0, NewCFP);
   6588       }
   6589 
   6590       // (fadd x, (fmul x, c)) -> (fmul x, c+1)
   6591       if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
   6592         SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
   6593                                      SDValue(CFP11, 0),
   6594                                      DAG.getConstantFP(1.0, VT));
   6595         return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
   6596                            N0, NewCFP);
   6597       }
   6598 
   6599 
   6600       // (fadd (fadd x, x), (fmul c, x)) -> (fmul x, c+2)
   6601       if (CFP10 && !CFP11 && N0.getOpcode() == ISD::FADD &&
   6602           N0.getOperand(0) == N0.getOperand(1) &&
   6603           N1.getOperand(1) == N0.getOperand(0)) {
   6604         SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
   6605                                      SDValue(CFP10, 0),
   6606                                      DAG.getConstantFP(2.0, VT));
   6607         return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
   6608                            N1.getOperand(1), NewCFP);
   6609       }
   6610 
   6611       // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
   6612       if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
   6613           N0.getOperand(0) == N0.getOperand(1) &&
   6614           N1.getOperand(0) == N0.getOperand(0)) {
   6615         SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
   6616                                      SDValue(CFP11, 0),
   6617                                      DAG.getConstantFP(2.0, VT));
   6618         return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
   6619                            N1.getOperand(0), NewCFP);
   6620       }
   6621     }
   6622 
   6623     if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) {
   6624       ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
   6625       // (fadd (fadd x, x), x) -> (fmul x, 3.0)
   6626       if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
   6627           (N0.getOperand(0) == N1))
   6628         return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
   6629                            N1, DAG.getConstantFP(3.0, VT));
   6630     }
   6631 
   6632     if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) {
   6633       ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
   6634       // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
   6635       if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
   6636           N1.getOperand(0) == N0)
   6637         return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
   6638                            N0, DAG.getConstantFP(3.0, VT));
   6639     }
   6640 
   6641     // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
   6642     if (AllowNewFpConst &&
   6643         N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
   6644         N0.getOperand(0) == N0.getOperand(1) &&
   6645         N1.getOperand(0) == N1.getOperand(1) &&
   6646         N0.getOperand(0) == N1.getOperand(0))
   6647       return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
   6648                          N0.getOperand(0),
   6649                          DAG.getConstantFP(4.0, VT));
   6650   }
   6651 
   6652   // FADD -> FMA combines:
   6653   if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
   6654        DAG.getTarget().Options.UnsafeFPMath) &&
   6655       DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
   6656       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
   6657 
   6658     // fold (fadd (fmul x, y), z) -> (fma x, y, z)
   6659     if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
   6660       return DAG.getNode(ISD::FMA, SDLoc(N), VT,
   6661                          N0.getOperand(0), N0.getOperand(1), N1);
   6662 
   6663     // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
   6664     // Note: Commutes FADD operands.
   6665     if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
   6666       return DAG.getNode(ISD::FMA, SDLoc(N), VT,
   6667                          N1.getOperand(0), N1.getOperand(1), N0);
   6668   }
   6669 
   6670   return SDValue();
   6671 }
   6672 
   6673 SDValue DAGCombiner::visitFSUB(SDNode *N) {
   6674   SDValue N0 = N->getOperand(0);
   6675   SDValue N1 = N->getOperand(1);
   6676   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   6677   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   6678   EVT VT = N->getValueType(0);
   6679   SDLoc dl(N);
   6680 
   6681   // fold vector ops
   6682   if (VT.isVector()) {
   6683     SDValue FoldedVOp = SimplifyVBinOp(N);
   6684     if (FoldedVOp.getNode()) return FoldedVOp;
   6685   }
   6686 
   6687   // fold (fsub c1, c2) -> c1-c2
   6688   if (N0CFP && N1CFP)
   6689     return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1);
   6690   // fold (fsub A, 0) -> A
   6691   if (DAG.getTarget().Options.UnsafeFPMath &&
   6692       N1CFP && N1CFP->getValueAPF().isZero())
   6693     return N0;
   6694   // fold (fsub 0, B) -> -B
   6695   if (DAG.getTarget().Options.UnsafeFPMath &&
   6696       N0CFP && N0CFP->getValueAPF().isZero()) {
   6697     if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
   6698       return GetNegatedExpression(N1, DAG, LegalOperations);
   6699     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
   6700       return DAG.getNode(ISD::FNEG, dl, VT, N1);
   6701   }
   6702   // fold (fsub A, (fneg B)) -> (fadd A, B)
   6703   if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
   6704     return DAG.getNode(ISD::FADD, dl, VT, N0,
   6705                        GetNegatedExpression(N1, DAG, LegalOperations));
   6706 
   6707   // If 'unsafe math' is enabled, fold
   6708   //    (fsub x, x) -> 0.0 &
   6709   //    (fsub x, (fadd x, y)) -> (fneg y) &
   6710   //    (fsub x, (fadd y, x)) -> (fneg y)
   6711   if (DAG.getTarget().Options.UnsafeFPMath) {
   6712     if (N0 == N1)
   6713       return DAG.getConstantFP(0.0f, VT);
   6714 
   6715     if (N1.getOpcode() == ISD::FADD) {
   6716       SDValue N10 = N1->getOperand(0);
   6717       SDValue N11 = N1->getOperand(1);
   6718 
   6719       if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI,
   6720                                           &DAG.getTarget().Options))
   6721         return GetNegatedExpression(N11, DAG, LegalOperations);
   6722 
   6723       if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI,
   6724                                           &DAG.getTarget().Options))
   6725         return GetNegatedExpression(N10, DAG, LegalOperations);
   6726     }
   6727   }
   6728 
   6729   // FSUB -> FMA combines:
   6730   if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
   6731        DAG.getTarget().Options.UnsafeFPMath) &&
   6732       DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
   6733       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
   6734 
   6735     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
   6736     if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
   6737       return DAG.getNode(ISD::FMA, dl, VT,
   6738                          N0.getOperand(0), N0.getOperand(1),
   6739                          DAG.getNode(ISD::FNEG, dl, VT, N1));
   6740 
   6741     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
   6742     // Note: Commutes FSUB operands.
   6743     if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
   6744       return DAG.getNode(ISD::FMA, dl, VT,
   6745                          DAG.getNode(ISD::FNEG, dl, VT,
   6746                          N1.getOperand(0)),
   6747                          N1.getOperand(1), N0);
   6748 
   6749     // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
   6750     if (N0.getOpcode() == ISD::FNEG &&
   6751         N0.getOperand(0).getOpcode() == ISD::FMUL &&
   6752         N0->hasOneUse() && N0.getOperand(0).hasOneUse()) {
   6753       SDValue N00 = N0.getOperand(0).getOperand(0);
   6754       SDValue N01 = N0.getOperand(0).getOperand(1);
   6755       return DAG.getNode(ISD::FMA, dl, VT,
   6756                          DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
   6757                          DAG.getNode(ISD::FNEG, dl, VT, N1));
   6758     }
   6759   }
   6760 
   6761   return SDValue();
   6762 }
   6763 
   6764 SDValue DAGCombiner::visitFMUL(SDNode *N) {
   6765   SDValue N0 = N->getOperand(0);
   6766   SDValue N1 = N->getOperand(1);
   6767   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   6768   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   6769   EVT VT = N->getValueType(0);
   6770   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   6771 
   6772   // fold vector ops
   6773   if (VT.isVector()) {
   6774     SDValue FoldedVOp = SimplifyVBinOp(N);
   6775     if (FoldedVOp.getNode()) return FoldedVOp;
   6776   }
   6777 
   6778   // fold (fmul c1, c2) -> c1*c2
   6779   if (N0CFP && N1CFP)
   6780     return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1);
   6781   // canonicalize constant to RHS
   6782   if (N0CFP && !N1CFP)
   6783     return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0);
   6784   // fold (fmul A, 0) -> 0
   6785   if (DAG.getTarget().Options.UnsafeFPMath &&
   6786       N1CFP && N1CFP->getValueAPF().isZero())
   6787     return N1;
   6788   // fold (fmul A, 0) -> 0, vector edition.
   6789   if (DAG.getTarget().Options.UnsafeFPMath &&
   6790       ISD::isBuildVectorAllZeros(N1.getNode()))
   6791     return N1;
   6792   // fold (fmul A, 1.0) -> A
   6793   if (N1CFP && N1CFP->isExactlyValue(1.0))
   6794     return N0;
   6795   // fold (fmul X, 2.0) -> (fadd X, X)
   6796   if (N1CFP && N1CFP->isExactlyValue(+2.0))
   6797     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0);
   6798   // fold (fmul X, -1.0) -> (fneg X)
   6799   if (N1CFP && N1CFP->isExactlyValue(-1.0))
   6800     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
   6801       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
   6802 
   6803   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
   6804   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
   6805                                        &DAG.getTarget().Options)) {
   6806     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
   6807                                          &DAG.getTarget().Options)) {
   6808       // Both can be negated for free, check to see if at least one is cheaper
   6809       // negated.
   6810       if (LHSNeg == 2 || RHSNeg == 2)
   6811         return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
   6812                            GetNegatedExpression(N0, DAG, LegalOperations),
   6813                            GetNegatedExpression(N1, DAG, LegalOperations));
   6814     }
   6815   }
   6816 
   6817   // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
   6818   if (DAG.getTarget().Options.UnsafeFPMath &&
   6819       N1CFP && N0.getOpcode() == ISD::FMUL &&
   6820       N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
   6821     return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
   6822                        DAG.getNode(ISD::FMUL, SDLoc(N), VT,
   6823                                    N0.getOperand(1), N1));
   6824 
   6825   return SDValue();
   6826 }
   6827 
   6828 SDValue DAGCombiner::visitFMA(SDNode *N) {
   6829   SDValue N0 = N->getOperand(0);
   6830   SDValue N1 = N->getOperand(1);
   6831   SDValue N2 = N->getOperand(2);
   6832   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   6833   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   6834   EVT VT = N->getValueType(0);
   6835   SDLoc dl(N);
   6836 
   6837   if (DAG.getTarget().Options.UnsafeFPMath) {
   6838     if (N0CFP && N0CFP->isZero())
   6839       return N2;
   6840     if (N1CFP && N1CFP->isZero())
   6841       return N2;
   6842   }
   6843   if (N0CFP && N0CFP->isExactlyValue(1.0))
   6844     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
   6845   if (N1CFP && N1CFP->isExactlyValue(1.0))
   6846     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
   6847 
   6848   // Canonicalize (fma c, x, y) -> (fma x, c, y)
   6849   if (N0CFP && !N1CFP)
   6850     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
   6851 
   6852   // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
   6853   if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
   6854       N2.getOpcode() == ISD::FMUL &&
   6855       N0 == N2.getOperand(0) &&
   6856       N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
   6857     return DAG.getNode(ISD::FMUL, dl, VT, N0,
   6858                        DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1)));
   6859   }
   6860 
   6861 
   6862   // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
   6863   if (DAG.getTarget().Options.UnsafeFPMath &&
   6864       N0.getOpcode() == ISD::FMUL && N1CFP &&
   6865       N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
   6866     return DAG.getNode(ISD::FMA, dl, VT,
   6867                        N0.getOperand(0),
   6868                        DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)),
   6869                        N2);
   6870   }
   6871 
   6872   // (fma x, 1, y) -> (fadd x, y)
   6873   // (fma x, -1, y) -> (fadd (fneg x), y)
   6874   if (N1CFP) {
   6875     if (N1CFP->isExactlyValue(1.0))
   6876       return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
   6877 
   6878     if (N1CFP->isExactlyValue(-1.0) &&
   6879         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
   6880       SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
   6881       AddToWorkList(RHSNeg.getNode());
   6882       return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
   6883     }
   6884   }
   6885 
   6886   // (fma x, c, x) -> (fmul x, (c+1))
   6887   if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2)
   6888     return DAG.getNode(ISD::FMUL, dl, VT, N0,
   6889                        DAG.getNode(ISD::FADD, dl, VT,
   6890                                    N1, DAG.getConstantFP(1.0, VT)));
   6891 
   6892   // (fma x, c, (fneg x)) -> (fmul x, (c-1))
   6893   if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
   6894       N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
   6895     return DAG.getNode(ISD::FMUL, dl, VT, N0,
   6896                        DAG.getNode(ISD::FADD, dl, VT,
   6897                                    N1, DAG.getConstantFP(-1.0, VT)));
   6898 
   6899 
   6900   return SDValue();
   6901 }
   6902 
   6903 SDValue DAGCombiner::visitFDIV(SDNode *N) {
   6904   SDValue N0 = N->getOperand(0);
   6905   SDValue N1 = N->getOperand(1);
   6906   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   6907   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   6908   EVT VT = N->getValueType(0);
   6909   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   6910 
   6911   // fold vector ops
   6912   if (VT.isVector()) {
   6913     SDValue FoldedVOp = SimplifyVBinOp(N);
   6914     if (FoldedVOp.getNode()) return FoldedVOp;
   6915   }
   6916 
   6917   // fold (fdiv c1, c2) -> c1/c2
   6918   if (N0CFP && N1CFP)
   6919     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
   6920 
   6921   // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
   6922   if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) {
   6923     // Compute the reciprocal 1.0 / c2.
   6924     APFloat N1APF = N1CFP->getValueAPF();
   6925     APFloat Recip(N1APF.getSemantics(), 1); // 1.0
   6926     APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
   6927     // Only do the transform if the reciprocal is a legal fp immediate that
   6928     // isn't too nasty (eg NaN, denormal, ...).
   6929     if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
   6930         (!LegalOperations ||
   6931          // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
   6932          // backend)... we should handle this gracefully after Legalize.
   6933          // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
   6934          TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
   6935          TLI.isFPImmLegal(Recip, VT)))
   6936       return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0,
   6937                          DAG.getConstantFP(Recip, VT));
   6938   }
   6939 
   6940   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
   6941   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
   6942                                        &DAG.getTarget().Options)) {
   6943     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
   6944                                          &DAG.getTarget().Options)) {
   6945       // Both can be negated for free, check to see if at least one is cheaper
   6946       // negated.
   6947       if (LHSNeg == 2 || RHSNeg == 2)
   6948         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
   6949                            GetNegatedExpression(N0, DAG, LegalOperations),
   6950                            GetNegatedExpression(N1, DAG, LegalOperations));
   6951     }
   6952   }
   6953 
   6954   return SDValue();
   6955 }
   6956 
   6957 SDValue DAGCombiner::visitFREM(SDNode *N) {
   6958   SDValue N0 = N->getOperand(0);
   6959   SDValue N1 = N->getOperand(1);
   6960   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   6961   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   6962   EVT VT = N->getValueType(0);
   6963 
   6964   // fold (frem c1, c2) -> fmod(c1,c2)
   6965   if (N0CFP && N1CFP)
   6966     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
   6967 
   6968   return SDValue();
   6969 }
   6970 
   6971 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
   6972   SDValue N0 = N->getOperand(0);
   6973   SDValue N1 = N->getOperand(1);
   6974   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   6975   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   6976   EVT VT = N->getValueType(0);
   6977 
   6978   if (N0CFP && N1CFP)  // Constant fold
   6979     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
   6980 
   6981   if (N1CFP) {
   6982     const APFloat& V = N1CFP->getValueAPF();
   6983     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
   6984     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
   6985     if (!V.isNegative()) {
   6986       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
   6987         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
   6988     } else {
   6989       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
   6990         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
   6991                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
   6992     }
   6993   }
   6994 
   6995   // copysign(fabs(x), y) -> copysign(x, y)
   6996   // copysign(fneg(x), y) -> copysign(x, y)
   6997   // copysign(copysign(x,z), y) -> copysign(x, y)
   6998   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
   6999       N0.getOpcode() == ISD::FCOPYSIGN)
   7000     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
   7001                        N0.getOperand(0), N1);
   7002 
   7003   // copysign(x, abs(y)) -> abs(x)
   7004   if (N1.getOpcode() == ISD::FABS)
   7005     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
   7006 
   7007   // copysign(x, copysign(y,z)) -> copysign(x, z)
   7008   if (N1.getOpcode() == ISD::FCOPYSIGN)
   7009     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
   7010                        N0, N1.getOperand(1));
   7011 
   7012   // copysign(x, fp_extend(y)) -> copysign(x, y)
   7013   // copysign(x, fp_round(y)) -> copysign(x, y)
   7014   if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
   7015     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
   7016                        N0, N1.getOperand(0));
   7017 
   7018   return SDValue();
   7019 }
   7020 
   7021 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
   7022   SDValue N0 = N->getOperand(0);
   7023   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   7024   EVT VT = N->getValueType(0);
   7025   EVT OpVT = N0.getValueType();
   7026 
   7027   // fold (sint_to_fp c1) -> c1fp
   7028   if (N0C &&
   7029       // ...but only if the target supports immediate floating-point values
   7030       (!LegalOperations ||
   7031        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
   7032     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
   7033 
   7034   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
   7035   // but UINT_TO_FP is legal on this target, try to convert.
   7036   if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
   7037       TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
   7038     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
   7039     if (DAG.SignBitIsZero(N0))
   7040       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
   7041   }
   7042 
   7043   // The next optimizations are desirable only if SELECT_CC can be lowered.
   7044   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
   7045     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
   7046     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
   7047         !VT.isVector() &&
   7048         (!LegalOperations ||
   7049          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
   7050       SDValue Ops[] =
   7051         { N0.getOperand(0), N0.getOperand(1),
   7052           DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT),
   7053           N0.getOperand(2) };
   7054       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
   7055     }
   7056 
   7057     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
   7058     //      (select_cc x, y, 1.0, 0.0,, cc)
   7059     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
   7060         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
   7061         (!LegalOperations ||
   7062          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
   7063       SDValue Ops[] =
   7064         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
   7065           DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT),
   7066           N0.getOperand(0).getOperand(2) };
   7067       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
   7068     }
   7069   }
   7070 
   7071   return SDValue();
   7072 }
   7073 
   7074 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   7075   SDValue N0 = N->getOperand(0);
   7076   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   7077   EVT VT = N->getValueType(0);
   7078   EVT OpVT = N0.getValueType();
   7079 
   7080   // fold (uint_to_fp c1) -> c1fp
   7081   if (N0C &&
   7082       // ...but only if the target supports immediate floating-point values
   7083       (!LegalOperations ||
   7084        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
   7085     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
   7086 
   7087   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
   7088   // but SINT_TO_FP is legal on this target, try to convert.
   7089   if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
   7090       TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
   7091     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
   7092     if (DAG.SignBitIsZero(N0))
   7093       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
   7094   }
   7095 
   7096   // The next optimizations are desirable only if SELECT_CC can be lowered.
   7097   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
   7098     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
   7099 
   7100     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
   7101         (!LegalOperations ||
   7102          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
   7103       SDValue Ops[] =
   7104         { N0.getOperand(0), N0.getOperand(1),
   7105           DAG.getConstantFP(1.0, VT),  DAG.getConstantFP(0.0, VT),
   7106           N0.getOperand(2) };
   7107       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
   7108     }
   7109   }
   7110 
   7111   return SDValue();
   7112 }
   7113 
   7114 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
   7115   SDValue N0 = N->getOperand(0);
   7116   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   7117   EVT VT = N->getValueType(0);
   7118 
   7119   // fold (fp_to_sint c1fp) -> c1
   7120   if (N0CFP)
   7121     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
   7122 
   7123   return SDValue();
   7124 }
   7125 
   7126 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
   7127   SDValue N0 = N->getOperand(0);
   7128   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   7129   EVT VT = N->getValueType(0);
   7130 
   7131   // fold (fp_to_uint c1fp) -> c1
   7132   if (N0CFP)
   7133     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
   7134 
   7135   return SDValue();
   7136 }
   7137 
   7138 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
   7139   SDValue N0 = N->getOperand(0);
   7140   SDValue N1 = N->getOperand(1);
   7141   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   7142   EVT VT = N->getValueType(0);
   7143 
   7144   // fold (fp_round c1fp) -> c1fp
   7145   if (N0CFP)
   7146     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
   7147 
   7148   // fold (fp_round (fp_extend x)) -> x
   7149   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
   7150     return N0.getOperand(0);
   7151 
   7152   // fold (fp_round (fp_round x)) -> (fp_round x)
   7153   if (N0.getOpcode() == ISD::FP_ROUND) {
   7154     // This is a value preserving truncation if both round's are.
   7155     bool IsTrunc = N->getConstantOperandVal(1) == 1 &&
   7156                    N0.getNode()->getConstantOperandVal(1) == 1;
   7157     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0),
   7158                        DAG.getIntPtrConstant(IsTrunc));
   7159   }
   7160 
   7161   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
   7162   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
   7163     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
   7164                               N0.getOperand(0), N1);
   7165     AddToWorkList(Tmp.getNode());
   7166     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
   7167                        Tmp, N0.getOperand(1));
   7168   }
   7169 
   7170   return SDValue();
   7171 }
   7172 
   7173 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
   7174   SDValue N0 = N->getOperand(0);
   7175   EVT VT = N->getValueType(0);
   7176   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
   7177   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   7178 
   7179   // fold (fp_round_inreg c1fp) -> c1fp
   7180   if (N0CFP && isTypeLegal(EVT)) {
   7181     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
   7182     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round);
   7183   }
   7184 
   7185   return SDValue();
   7186 }
   7187 
   7188 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
   7189   SDValue N0 = N->getOperand(0);
   7190   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   7191   EVT VT = N->getValueType(0);
   7192 
   7193   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
   7194   if (N->hasOneUse() &&
   7195       N->use_begin()->getOpcode() == ISD::FP_ROUND)
   7196     return SDValue();
   7197 
   7198   // fold (fp_extend c1fp) -> c1fp
   7199   if (N0CFP)
   7200     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
   7201 
   7202   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
   7203   // value of X.
   7204   if (N0.getOpcode() == ISD::FP_ROUND
   7205       && N0.getNode()->getConstantOperandVal(1) == 1) {
   7206     SDValue In = N0.getOperand(0);
   7207     if (In.getValueType() == VT) return In;
   7208     if (VT.bitsLT(In.getValueType()))
   7209       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
   7210                          In, N0.getOperand(1));
   7211     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
   7212   }
   7213 
   7214   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
   7215   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
   7216       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
   7217        TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
   7218     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   7219     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
   7220                                      LN0->getChain(),
   7221                                      LN0->getBasePtr(), N0.getValueType(),
   7222                                      LN0->getMemOperand());
   7223     CombineTo(N, ExtLoad);
   7224     CombineTo(N0.getNode(),
   7225               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
   7226                           N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)),
   7227               ExtLoad.getValue(1));
   7228     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   7229   }
   7230 
   7231   return SDValue();
   7232 }
   7233 
   7234 SDValue DAGCombiner::visitFNEG(SDNode *N) {
   7235   SDValue N0 = N->getOperand(0);
   7236   EVT VT = N->getValueType(0);
   7237 
   7238   if (VT.isVector()) {
   7239     SDValue FoldedVOp = SimplifyVUnaryOp(N);
   7240     if (FoldedVOp.getNode()) return FoldedVOp;
   7241   }
   7242 
   7243   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
   7244                          &DAG.getTarget().Options))
   7245     return GetNegatedExpression(N0, DAG, LegalOperations);
   7246 
   7247   // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
   7248   // constant pool values.
   7249   if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST &&
   7250       !VT.isVector() &&
   7251       N0.getNode()->hasOneUse() &&
   7252       N0.getOperand(0).getValueType().isInteger()) {
   7253     SDValue Int = N0.getOperand(0);
   7254     EVT IntVT = Int.getValueType();
   7255     if (IntVT.isInteger() && !IntVT.isVector()) {
   7256       Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int,
   7257               DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
   7258       AddToWorkList(Int.getNode());
   7259       return DAG.getNode(ISD::BITCAST, SDLoc(N),
   7260                          VT, Int);
   7261     }
   7262   }
   7263 
   7264   // (fneg (fmul c, x)) -> (fmul -c, x)
   7265   if (N0.getOpcode() == ISD::FMUL) {
   7266     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
   7267     if (CFP1) {
   7268       APFloat CVal = CFP1->getValueAPF();
   7269       CVal.changeSign();
   7270       if (Level >= AfterLegalizeDAG &&
   7271           (TLI.isFPImmLegal(CVal, N->getValueType(0)) ||
   7272            TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0))))
   7273         return DAG.getNode(
   7274             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
   7275             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)));
   7276     }
   7277   }
   7278 
   7279   return SDValue();
   7280 }
   7281 
   7282 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
   7283   SDValue N0 = N->getOperand(0);
   7284   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   7285   EVT VT = N->getValueType(0);
   7286 
   7287   // fold (fceil c1) -> fceil(c1)
   7288   if (N0CFP)
   7289     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
   7290 
   7291   return SDValue();
   7292 }
   7293 
   7294 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
   7295   SDValue N0 = N->getOperand(0);
   7296   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   7297   EVT VT = N->getValueType(0);
   7298 
   7299   // fold (ftrunc c1) -> ftrunc(c1)
   7300   if (N0CFP)
   7301     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
   7302 
   7303   return SDValue();
   7304 }
   7305 
   7306 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
   7307   SDValue N0 = N->getOperand(0);
   7308   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   7309   EVT VT = N->getValueType(0);
   7310 
   7311   // fold (ffloor c1) -> ffloor(c1)
   7312   if (N0CFP)
   7313     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
   7314 
   7315   return SDValue();
   7316 }
   7317 
   7318 SDValue DAGCombiner::visitFABS(SDNode *N) {
   7319   SDValue N0 = N->getOperand(0);
   7320   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   7321   EVT VT = N->getValueType(0);
   7322 
   7323   if (VT.isVector()) {
   7324     SDValue FoldedVOp = SimplifyVUnaryOp(N);
   7325     if (FoldedVOp.getNode()) return FoldedVOp;
   7326   }
   7327 
   7328   // fold (fabs c1) -> fabs(c1)
   7329   if (N0CFP)
   7330     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
   7331   // fold (fabs (fabs x)) -> (fabs x)
   7332   if (N0.getOpcode() == ISD::FABS)
   7333     return N->getOperand(0);
   7334   // fold (fabs (fneg x)) -> (fabs x)
   7335   // fold (fabs (fcopysign x, y)) -> (fabs x)
   7336   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
   7337     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
   7338 
   7339   // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
   7340   // constant pool values.
   7341   if (!TLI.isFAbsFree(VT) &&
   7342       N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
   7343       N0.getOperand(0).getValueType().isInteger() &&
   7344       !N0.getOperand(0).getValueType().isVector()) {
   7345     SDValue Int = N0.getOperand(0);
   7346     EVT IntVT = Int.getValueType();
   7347     if (IntVT.isInteger() && !IntVT.isVector()) {
   7348       Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int,
   7349              DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
   7350       AddToWorkList(Int.getNode());
   7351       return DAG.getNode(ISD::BITCAST, SDLoc(N),
   7352                          N->getValueType(0), Int);
   7353     }
   7354   }
   7355 
   7356   return SDValue();
   7357 }
   7358 
   7359 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
   7360   SDValue Chain = N->getOperand(0);
   7361   SDValue N1 = N->getOperand(1);
   7362   SDValue N2 = N->getOperand(2);
   7363 
   7364   // If N is a constant we could fold this into a fallthrough or unconditional
   7365   // branch. However that doesn't happen very often in normal code, because
   7366   // Instcombine/SimplifyCFG should have handled the available opportunities.
   7367   // If we did this folding here, it would be necessary to update the
   7368   // MachineBasicBlock CFG, which is awkward.
   7369 
   7370   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
   7371   // on the target.
   7372   if (N1.getOpcode() == ISD::SETCC &&
   7373       TLI.isOperationLegalOrCustom(ISD::BR_CC,
   7374                                    N1.getOperand(0).getValueType())) {
   7375     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
   7376                        Chain, N1.getOperand(2),
   7377                        N1.getOperand(0), N1.getOperand(1), N2);
   7378   }
   7379 
   7380   if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
   7381       ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
   7382        (N1.getOperand(0).hasOneUse() &&
   7383         N1.getOperand(0).getOpcode() == ISD::SRL))) {
   7384     SDNode *Trunc = nullptr;
   7385     if (N1.getOpcode() == ISD::TRUNCATE) {
   7386       // Look pass the truncate.
   7387       Trunc = N1.getNode();
   7388       N1 = N1.getOperand(0);
   7389     }
   7390 
   7391     // Match this pattern so that we can generate simpler code:
   7392     //
   7393     //   %a = ...
   7394     //   %b = and i32 %a, 2
   7395     //   %c = srl i32 %b, 1
   7396     //   brcond i32 %c ...
   7397     //
   7398     // into
   7399     //
   7400     //   %a = ...
   7401     //   %b = and i32 %a, 2
   7402     //   %c = setcc eq %b, 0
   7403     //   brcond %c ...
   7404     //
   7405     // This applies only when the AND constant value has one bit set and the
   7406     // SRL constant is equal to the log2 of the AND constant. The back-end is
   7407     // smart enough to convert the result into a TEST/JMP sequence.
   7408     SDValue Op0 = N1.getOperand(0);
   7409     SDValue Op1 = N1.getOperand(1);
   7410 
   7411     if (Op0.getOpcode() == ISD::AND &&
   7412         Op1.getOpcode() == ISD::Constant) {
   7413       SDValue AndOp1 = Op0.getOperand(1);
   7414 
   7415       if (AndOp1.getOpcode() == ISD::Constant) {
   7416         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
   7417 
   7418         if (AndConst.isPowerOf2() &&
   7419             cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
   7420           SDValue SetCC =
   7421             DAG.getSetCC(SDLoc(N),
   7422                          getSetCCResultType(Op0.getValueType()),
   7423                          Op0, DAG.getConstant(0, Op0.getValueType()),
   7424                          ISD::SETNE);
   7425 
   7426           SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N),
   7427                                           MVT::Other, Chain, SetCC, N2);
   7428           // Don't add the new BRCond into the worklist or else SimplifySelectCC
   7429           // will convert it back to (X & C1) >> C2.
   7430           CombineTo(N, NewBRCond, false);
   7431           // Truncate is dead.
   7432           if (Trunc) {
   7433             removeFromWorkList(Trunc);
   7434             DAG.DeleteNode(Trunc);
   7435           }
   7436           // Replace the uses of SRL with SETCC
   7437           WorkListRemover DeadNodes(*this);
   7438           DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
   7439           removeFromWorkList(N1.getNode());
   7440           DAG.DeleteNode(N1.getNode());
   7441           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   7442         }
   7443       }
   7444     }
   7445 
   7446     if (Trunc)
   7447       // Restore N1 if the above transformation doesn't match.
   7448       N1 = N->getOperand(1);
   7449   }
   7450 
   7451   // Transform br(xor(x, y)) -> br(x != y)
   7452   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
   7453   if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
   7454     SDNode *TheXor = N1.getNode();
   7455     SDValue Op0 = TheXor->getOperand(0);
   7456     SDValue Op1 = TheXor->getOperand(1);
   7457     if (Op0.getOpcode() == Op1.getOpcode()) {
   7458       // Avoid missing important xor optimizations.
   7459       SDValue Tmp = visitXOR(TheXor);
   7460       if (Tmp.getNode()) {
   7461         if (Tmp.getNode() != TheXor) {
   7462           DEBUG(dbgs() << "\nReplacing.8 ";
   7463                 TheXor->dump(&DAG);
   7464                 dbgs() << "\nWith: ";
   7465                 Tmp.getNode()->dump(&DAG);
   7466                 dbgs() << '\n');
   7467           WorkListRemover DeadNodes(*this);
   7468           DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
   7469           removeFromWorkList(TheXor);
   7470           DAG.DeleteNode(TheXor);
   7471           return DAG.getNode(ISD::BRCOND, SDLoc(N),
   7472                              MVT::Other, Chain, Tmp, N2);
   7473         }
   7474 
   7475         // visitXOR has changed XOR's operands or replaced the XOR completely,
   7476         // bail out.
   7477         return SDValue(N, 0);
   7478       }
   7479     }
   7480 
   7481     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
   7482       bool Equal = false;
   7483       if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0))
   7484         if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() &&
   7485             Op0.getOpcode() == ISD::XOR) {
   7486           TheXor = Op0.getNode();
   7487           Equal = true;
   7488         }
   7489 
   7490       EVT SetCCVT = N1.getValueType();
   7491       if (LegalTypes)
   7492         SetCCVT = getSetCCResultType(SetCCVT);
   7493       SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
   7494                                    SetCCVT,
   7495                                    Op0, Op1,
   7496                                    Equal ? ISD::SETEQ : ISD::SETNE);
   7497       // Replace the uses of XOR with SETCC
   7498       WorkListRemover DeadNodes(*this);
   7499       DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
   7500       removeFromWorkList(N1.getNode());
   7501       DAG.DeleteNode(N1.getNode());
   7502       return DAG.getNode(ISD::BRCOND, SDLoc(N),
   7503                          MVT::Other, Chain, SetCC, N2);
   7504     }
   7505   }
   7506 
   7507   return SDValue();
   7508 }
   7509 
   7510 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
   7511 //
   7512 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
   7513   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
   7514   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
   7515 
   7516   // If N is a constant we could fold this into a fallthrough or unconditional
   7517   // branch. However that doesn't happen very often in normal code, because
   7518   // Instcombine/SimplifyCFG should have handled the available opportunities.
   7519   // If we did this folding here, it would be necessary to update the
   7520   // MachineBasicBlock CFG, which is awkward.
   7521 
   7522   // Use SimplifySetCC to simplify SETCC's.
   7523   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
   7524                                CondLHS, CondRHS, CC->get(), SDLoc(N),
   7525                                false);
   7526   if (Simp.getNode()) AddToWorkList(Simp.getNode());
   7527 
   7528   // fold to a simpler setcc
   7529   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
   7530     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
   7531                        N->getOperand(0), Simp.getOperand(2),
   7532                        Simp.getOperand(0), Simp.getOperand(1),
   7533                        N->getOperand(4));
   7534 
   7535   return SDValue();
   7536 }
   7537 
   7538 /// canFoldInAddressingMode - Return true if 'Use' is a load or a store that
   7539 /// uses N as its base pointer and that N may be folded in the load / store
   7540 /// addressing mode.
   7541 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
   7542                                     SelectionDAG &DAG,
   7543                                     const TargetLowering &TLI) {
   7544   EVT VT;
   7545   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
   7546     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
   7547       return false;
   7548     VT = Use->getValueType(0);
   7549   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
   7550     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
   7551       return false;
   7552     VT = ST->getValue().getValueType();
   7553   } else
   7554     return false;
   7555 
   7556   TargetLowering::AddrMode AM;
   7557   if (N->getOpcode() == ISD::ADD) {
   7558     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
   7559     if (Offset)
   7560       // [reg +/- imm]
   7561       AM.BaseOffs = Offset->getSExtValue();
   7562     else
   7563       // [reg +/- reg]
   7564       AM.Scale = 1;
   7565   } else if (N->getOpcode() == ISD::SUB) {
   7566     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
   7567     if (Offset)
   7568       // [reg +/- imm]
   7569       AM.BaseOffs = -Offset->getSExtValue();
   7570     else
   7571       // [reg +/- reg]
   7572       AM.Scale = 1;
   7573   } else
   7574     return false;
   7575 
   7576   return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
   7577 }
   7578 
   7579 /// CombineToPreIndexedLoadStore - Try turning a load / store into a
   7580 /// pre-indexed load / store when the base pointer is an add or subtract
   7581 /// and it has other uses besides the load / store. After the
   7582 /// transformation, the new indexed load / store has effectively folded
   7583 /// the add / subtract in and all of its other uses are redirected to the
   7584 /// new load / store.
   7585 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
   7586   if (Level < AfterLegalizeDAG)
   7587     return false;
   7588 
   7589   bool isLoad = true;
   7590   SDValue Ptr;
   7591   EVT VT;
   7592   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
   7593     if (LD->isIndexed())
   7594       return false;
   7595     VT = LD->getMemoryVT();
   7596     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
   7597         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
   7598       return false;
   7599     Ptr = LD->getBasePtr();
   7600   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
   7601     if (ST->isIndexed())
   7602       return false;
   7603     VT = ST->getMemoryVT();
   7604     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
   7605         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
   7606       return false;
   7607     Ptr = ST->getBasePtr();
   7608     isLoad = false;
   7609   } else {
   7610     return false;
   7611   }
   7612 
   7613   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
   7614   // out.  There is no reason to make this a preinc/predec.
   7615   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
   7616       Ptr.getNode()->hasOneUse())
   7617     return false;
   7618 
   7619   // Ask the target to do addressing mode selection.
   7620   SDValue BasePtr;
   7621   SDValue Offset;
   7622   ISD::MemIndexedMode AM = ISD::UNINDEXED;
   7623   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
   7624     return false;
   7625 
   7626   // Backends without true r+i pre-indexed forms may need to pass a
   7627   // constant base with a variable offset so that constant coercion
   7628   // will work with the patterns in canonical form.
   7629   bool Swapped = false;
   7630   if (isa<ConstantSDNode>(BasePtr)) {
   7631     std::swap(BasePtr, Offset);
   7632     Swapped = true;
   7633   }
   7634 
   7635   // Don't create a indexed load / store with zero offset.
   7636   if (isa<ConstantSDNode>(Offset) &&
   7637       cast<ConstantSDNode>(Offset)->isNullValue())
   7638     return false;
   7639 
   7640   // Try turning it into a pre-indexed load / store except when:
   7641   // 1) The new base ptr is a frame index.
   7642   // 2) If N is a store and the new base ptr is either the same as or is a
   7643   //    predecessor of the value being stored.
   7644   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
   7645   //    that would create a cycle.
   7646   // 4) All uses are load / store ops that use it as old base ptr.
   7647 
   7648   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
   7649   // (plus the implicit offset) to a register to preinc anyway.
   7650   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
   7651     return false;
   7652 
   7653   // Check #2.
   7654   if (!isLoad) {
   7655     SDValue Val = cast<StoreSDNode>(N)->getValue();
   7656     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
   7657       return false;
   7658   }
   7659 
   7660   // If the offset is a constant, there may be other adds of constants that
   7661   // can be folded with this one. We should do this to avoid having to keep
   7662   // a copy of the original base pointer.
   7663   SmallVector<SDNode *, 16> OtherUses;
   7664   if (isa<ConstantSDNode>(Offset))
   7665     for (SDNode *Use : BasePtr.getNode()->uses()) {
   7666       if (Use == Ptr.getNode())
   7667         continue;
   7668 
   7669       if (Use->isPredecessorOf(N))
   7670         continue;
   7671 
   7672       if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) {
   7673         OtherUses.clear();
   7674         break;
   7675       }
   7676 
   7677       SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1);
   7678       if (Op1.getNode() == BasePtr.getNode())
   7679         std::swap(Op0, Op1);
   7680       assert(Op0.getNode() == BasePtr.getNode() &&
   7681              "Use of ADD/SUB but not an operand");
   7682 
   7683       if (!isa<ConstantSDNode>(Op1)) {
   7684         OtherUses.clear();
   7685         break;
   7686       }
   7687 
   7688       // FIXME: In some cases, we can be smarter about this.
   7689       if (Op1.getValueType() != Offset.getValueType()) {
   7690         OtherUses.clear();
   7691         break;
   7692       }
   7693 
   7694       OtherUses.push_back(Use);
   7695     }
   7696 
   7697   if (Swapped)
   7698     std::swap(BasePtr, Offset);
   7699 
   7700   // Now check for #3 and #4.
   7701   bool RealUse = false;
   7702 
   7703   // Caches for hasPredecessorHelper
   7704   SmallPtrSet<const SDNode *, 32> Visited;
   7705   SmallVector<const SDNode *, 16> Worklist;
   7706 
   7707   for (SDNode *Use : Ptr.getNode()->uses()) {
   7708     if (Use == N)
   7709       continue;
   7710     if (N->hasPredecessorHelper(Use, Visited, Worklist))
   7711       return false;
   7712 
   7713     // If Ptr may be folded in addressing mode of other use, then it's
   7714     // not profitable to do this transformation.
   7715     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
   7716       RealUse = true;
   7717   }
   7718 
   7719   if (!RealUse)
   7720     return false;
   7721 
   7722   SDValue Result;
   7723   if (isLoad)
   7724     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
   7725                                 BasePtr, Offset, AM);
   7726   else
   7727     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
   7728                                  BasePtr, Offset, AM);
   7729   ++PreIndexedNodes;
   7730   ++NodesCombined;
   7731   DEBUG(dbgs() << "\nReplacing.4 ";
   7732         N->dump(&DAG);
   7733         dbgs() << "\nWith: ";
   7734         Result.getNode()->dump(&DAG);
   7735         dbgs() << '\n');
   7736   WorkListRemover DeadNodes(*this);
   7737   if (isLoad) {
   7738     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
   7739     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
   7740   } else {
   7741     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
   7742   }
   7743 
   7744   // Finally, since the node is now dead, remove it from the graph.
   7745   DAG.DeleteNode(N);
   7746 
   7747   if (Swapped)
   7748     std::swap(BasePtr, Offset);
   7749 
   7750   // Replace other uses of BasePtr that can be updated to use Ptr
   7751   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
   7752     unsigned OffsetIdx = 1;
   7753     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
   7754       OffsetIdx = 0;
   7755     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
   7756            BasePtr.getNode() && "Expected BasePtr operand");
   7757 
   7758     // We need to replace ptr0 in the following expression:
   7759     //   x0 * offset0 + y0 * ptr0 = t0
   7760     // knowing that
   7761     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
   7762     //
   7763     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
   7764     // indexed load/store and the expresion that needs to be re-written.
   7765     //
   7766     // Therefore, we have:
   7767     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
   7768 
   7769     ConstantSDNode *CN =
   7770       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
   7771     int X0, X1, Y0, Y1;
   7772     APInt Offset0 = CN->getAPIntValue();
   7773     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
   7774 
   7775     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
   7776     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
   7777     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
   7778     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
   7779 
   7780     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
   7781 
   7782     APInt CNV = Offset0;
   7783     if (X0 < 0) CNV = -CNV;
   7784     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
   7785     else CNV = CNV - Offset1;
   7786 
   7787     // We can now generate the new expression.
   7788     SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0));
   7789     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
   7790 
   7791     SDValue NewUse = DAG.getNode(Opcode,
   7792                                  SDLoc(OtherUses[i]),
   7793                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
   7794     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
   7795     removeFromWorkList(OtherUses[i]);
   7796     DAG.DeleteNode(OtherUses[i]);
   7797   }
   7798 
   7799   // Replace the uses of Ptr with uses of the updated base value.
   7800   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
   7801   removeFromWorkList(Ptr.getNode());
   7802   DAG.DeleteNode(Ptr.getNode());
   7803 
   7804   return true;
   7805 }
   7806 
   7807 /// CombineToPostIndexedLoadStore - Try to combine a load / store with a
   7808 /// add / sub of the base pointer node into a post-indexed load / store.
   7809 /// The transformation folded the add / subtract into the new indexed
   7810 /// load / store effectively and all of its uses are redirected to the
   7811 /// new load / store.
   7812 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
   7813   if (Level < AfterLegalizeDAG)
   7814     return false;
   7815 
   7816   bool isLoad = true;
   7817   SDValue Ptr;
   7818   EVT VT;
   7819   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
   7820     if (LD->isIndexed())
   7821       return false;
   7822     VT = LD->getMemoryVT();
   7823     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
   7824         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
   7825       return false;
   7826     Ptr = LD->getBasePtr();
   7827   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
   7828     if (ST->isIndexed())
   7829       return false;
   7830     VT = ST->getMemoryVT();
   7831     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
   7832         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
   7833       return false;
   7834     Ptr = ST->getBasePtr();
   7835     isLoad = false;
   7836   } else {
   7837     return false;
   7838   }
   7839 
   7840   if (Ptr.getNode()->hasOneUse())
   7841     return false;
   7842 
   7843   for (SDNode *Op : Ptr.getNode()->uses()) {
   7844     if (Op == N ||
   7845         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
   7846       continue;
   7847 
   7848     SDValue BasePtr;
   7849     SDValue Offset;
   7850     ISD::MemIndexedMode AM = ISD::UNINDEXED;
   7851     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
   7852       // Don't create a indexed load / store with zero offset.
   7853       if (isa<ConstantSDNode>(Offset) &&
   7854           cast<ConstantSDNode>(Offset)->isNullValue())
   7855         continue;
   7856 
   7857       // Try turning it into a post-indexed load / store except when
   7858       // 1) All uses are load / store ops that use it as base ptr (and
   7859       //    it may be folded as addressing mmode).
   7860       // 2) Op must be independent of N, i.e. Op is neither a predecessor
   7861       //    nor a successor of N. Otherwise, if Op is folded that would
   7862       //    create a cycle.
   7863 
   7864       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
   7865         continue;
   7866 
   7867       // Check for #1.
   7868       bool TryNext = false;
   7869       for (SDNode *Use : BasePtr.getNode()->uses()) {
   7870         if (Use == Ptr.getNode())
   7871           continue;
   7872 
   7873         // If all the uses are load / store addresses, then don't do the
   7874         // transformation.
   7875         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
   7876           bool RealUse = false;
   7877           for (SDNode *UseUse : Use->uses()) {
   7878             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
   7879               RealUse = true;
   7880           }
   7881 
   7882           if (!RealUse) {
   7883             TryNext = true;
   7884             break;
   7885           }
   7886         }
   7887       }
   7888 
   7889       if (TryNext)
   7890         continue;
   7891 
   7892       // Check for #2
   7893       if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
   7894         SDValue Result = isLoad
   7895           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
   7896                                BasePtr, Offset, AM)
   7897           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
   7898                                 BasePtr, Offset, AM);
   7899         ++PostIndexedNodes;
   7900         ++NodesCombined;
   7901         DEBUG(dbgs() << "\nReplacing.5 ";
   7902               N->dump(&DAG);
   7903               dbgs() << "\nWith: ";
   7904               Result.getNode()->dump(&DAG);
   7905               dbgs() << '\n');
   7906         WorkListRemover DeadNodes(*this);
   7907         if (isLoad) {
   7908           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
   7909           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
   7910         } else {
   7911           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
   7912         }
   7913 
   7914         // Finally, since the node is now dead, remove it from the graph.
   7915         DAG.DeleteNode(N);
   7916 
   7917         // Replace the uses of Use with uses of the updated base value.
   7918         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
   7919                                       Result.getValue(isLoad ? 1 : 0));
   7920         removeFromWorkList(Op);
   7921         DAG.DeleteNode(Op);
   7922         return true;
   7923       }
   7924     }
   7925   }
   7926 
   7927   return false;
   7928 }
   7929 
   7930 SDValue DAGCombiner::visitLOAD(SDNode *N) {
   7931   LoadSDNode *LD  = cast<LoadSDNode>(N);
   7932   SDValue Chain = LD->getChain();
   7933   SDValue Ptr   = LD->getBasePtr();
   7934 
   7935   // If load is not volatile and there are no uses of the loaded value (and
   7936   // the updated indexed value in case of indexed loads), change uses of the
   7937   // chain value into uses of the chain input (i.e. delete the dead load).
   7938   if (!LD->isVolatile()) {
   7939     if (N->getValueType(1) == MVT::Other) {
   7940       // Unindexed loads.
   7941       if (!N->hasAnyUseOfValue(0)) {
   7942         // It's not safe to use the two value CombineTo variant here. e.g.
   7943         // v1, chain2 = load chain1, loc
   7944         // v2, chain3 = load chain2, loc
   7945         // v3         = add v2, c
   7946         // Now we replace use of chain2 with chain1.  This makes the second load
   7947         // isomorphic to the one we are deleting, and thus makes this load live.
   7948         DEBUG(dbgs() << "\nReplacing.6 ";
   7949               N->dump(&DAG);
   7950               dbgs() << "\nWith chain: ";
   7951               Chain.getNode()->dump(&DAG);
   7952               dbgs() << "\n");
   7953         WorkListRemover DeadNodes(*this);
   7954         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
   7955 
   7956         if (N->use_empty()) {
   7957           removeFromWorkList(N);
   7958           DAG.DeleteNode(N);
   7959         }
   7960 
   7961         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   7962       }
   7963     } else {
   7964       // Indexed loads.
   7965       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
   7966       if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
   7967         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
   7968         DEBUG(dbgs() << "\nReplacing.7 ";
   7969               N->dump(&DAG);
   7970               dbgs() << "\nWith: ";
   7971               Undef.getNode()->dump(&DAG);
   7972               dbgs() << " and 2 other values\n");
   7973         WorkListRemover DeadNodes(*this);
   7974         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
   7975         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
   7976                                       DAG.getUNDEF(N->getValueType(1)));
   7977         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
   7978         removeFromWorkList(N);
   7979         DAG.DeleteNode(N);
   7980         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   7981       }
   7982     }
   7983   }
   7984 
   7985   // If this load is directly stored, replace the load value with the stored
   7986   // value.
   7987   // TODO: Handle store large -> read small portion.
   7988   // TODO: Handle TRUNCSTORE/LOADEXT
   7989   if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
   7990     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
   7991       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
   7992       if (PrevST->getBasePtr() == Ptr &&
   7993           PrevST->getValue().getValueType() == N->getValueType(0))
   7994       return CombineTo(N, Chain.getOperand(1), Chain);
   7995     }
   7996   }
   7997 
   7998   // Try to infer better alignment information than the load already has.
   7999   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
   8000     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
   8001       if (Align > LD->getMemOperand()->getBaseAlignment()) {
   8002         SDValue NewLoad =
   8003                DAG.getExtLoad(LD->getExtensionType(), SDLoc(N),
   8004                               LD->getValueType(0),
   8005                               Chain, Ptr, LD->getPointerInfo(),
   8006                               LD->getMemoryVT(),
   8007                               LD->isVolatile(), LD->isNonTemporal(), Align,
   8008                               LD->getTBAAInfo());
   8009         return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
   8010       }
   8011     }
   8012   }
   8013 
   8014   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
   8015     TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
   8016 #ifndef NDEBUG
   8017   if (CombinerAAOnlyFunc.getNumOccurrences() &&
   8018       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
   8019     UseAA = false;
   8020 #endif
   8021   if (UseAA && LD->isUnindexed()) {
   8022     // Walk up chain skipping non-aliasing memory nodes.
   8023     SDValue BetterChain = FindBetterChain(N, Chain);
   8024 
   8025     // If there is a better chain.
   8026     if (Chain != BetterChain) {
   8027       SDValue ReplLoad;
   8028 
   8029       // Replace the chain to void dependency.
   8030       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
   8031         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
   8032                                BetterChain, Ptr, LD->getMemOperand());
   8033       } else {
   8034         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
   8035                                   LD->getValueType(0),
   8036                                   BetterChain, Ptr, LD->getMemoryVT(),
   8037                                   LD->getMemOperand());
   8038       }
   8039 
   8040       // Create token factor to keep old chain connected.
   8041       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
   8042                                   MVT::Other, Chain, ReplLoad.getValue(1));
   8043 
   8044       // Make sure the new and old chains are cleaned up.
   8045       AddToWorkList(Token.getNode());
   8046 
   8047       // Replace uses with load result and token factor. Don't add users
   8048       // to work list.
   8049       return CombineTo(N, ReplLoad.getValue(0), Token, false);
   8050     }
   8051   }
   8052 
   8053   // Try transforming N to an indexed load.
   8054   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
   8055     return SDValue(N, 0);
   8056 
   8057   // Try to slice up N to more direct loads if the slices are mapped to
   8058   // different register banks or pairing can take place.
   8059   if (SliceUpLoad(N))
   8060     return SDValue(N, 0);
   8061 
   8062   return SDValue();
   8063 }
   8064 
   8065 namespace {
   8066 /// \brief Helper structure used to slice a load in smaller loads.
   8067 /// Basically a slice is obtained from the following sequence:
   8068 /// Origin = load Ty1, Base
   8069 /// Shift = srl Ty1 Origin, CstTy Amount
   8070 /// Inst = trunc Shift to Ty2
   8071 ///
   8072 /// Then, it will be rewriten into:
   8073 /// Slice = load SliceTy, Base + SliceOffset
   8074 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
   8075 ///
   8076 /// SliceTy is deduced from the number of bits that are actually used to
   8077 /// build Inst.
   8078 struct LoadedSlice {
   8079   /// \brief Helper structure used to compute the cost of a slice.
   8080   struct Cost {
   8081     /// Are we optimizing for code size.
   8082     bool ForCodeSize;
   8083     /// Various cost.
   8084     unsigned Loads;
   8085     unsigned Truncates;
   8086     unsigned CrossRegisterBanksCopies;
   8087     unsigned ZExts;
   8088     unsigned Shift;
   8089 
   8090     Cost(bool ForCodeSize = false)
   8091         : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
   8092           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
   8093 
   8094     /// \brief Get the cost of one isolated slice.
   8095     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
   8096         : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
   8097           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
   8098       EVT TruncType = LS.Inst->getValueType(0);
   8099       EVT LoadedType = LS.getLoadedType();
   8100       if (TruncType != LoadedType &&
   8101           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
   8102         ZExts = 1;
   8103     }
   8104 
   8105     /// \brief Account for slicing gain in the current cost.
   8106     /// Slicing provide a few gains like removing a shift or a
   8107     /// truncate. This method allows to grow the cost of the original
   8108     /// load with the gain from this slice.
   8109     void addSliceGain(const LoadedSlice &LS) {
   8110       // Each slice saves a truncate.
   8111       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
   8112       if (!TLI.isTruncateFree(LS.Inst->getValueType(0),
   8113                               LS.Inst->getOperand(0).getValueType()))
   8114         ++Truncates;
   8115       // If there is a shift amount, this slice gets rid of it.
   8116       if (LS.Shift)
   8117         ++Shift;
   8118       // If this slice can merge a cross register bank copy, account for it.
   8119       if (LS.canMergeExpensiveCrossRegisterBankCopy())
   8120         ++CrossRegisterBanksCopies;
   8121     }
   8122 
   8123     Cost &operator+=(const Cost &RHS) {
   8124       Loads += RHS.Loads;
   8125       Truncates += RHS.Truncates;
   8126       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
   8127       ZExts += RHS.ZExts;
   8128       Shift += RHS.Shift;
   8129       return *this;
   8130     }
   8131 
   8132     bool operator==(const Cost &RHS) const {
   8133       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
   8134              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
   8135              ZExts == RHS.ZExts && Shift == RHS.Shift;
   8136     }
   8137 
   8138     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
   8139 
   8140     bool operator<(const Cost &RHS) const {
   8141       // Assume cross register banks copies are as expensive as loads.
   8142       // FIXME: Do we want some more target hooks?
   8143       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
   8144       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
   8145       // Unless we are optimizing for code size, consider the
   8146       // expensive operation first.
   8147       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
   8148         return ExpensiveOpsLHS < ExpensiveOpsRHS;
   8149       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
   8150              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
   8151     }
   8152 
   8153     bool operator>(const Cost &RHS) const { return RHS < *this; }
   8154 
   8155     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
   8156 
   8157     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
   8158   };
   8159   // The last instruction that represent the slice. This should be a
   8160   // truncate instruction.
   8161   SDNode *Inst;
   8162   // The original load instruction.
   8163   LoadSDNode *Origin;
   8164   // The right shift amount in bits from the original load.
   8165   unsigned Shift;
   8166   // The DAG from which Origin came from.
   8167   // This is used to get some contextual information about legal types, etc.
   8168   SelectionDAG *DAG;
   8169 
   8170   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
   8171               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
   8172       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
   8173 
   8174   LoadedSlice(const LoadedSlice &LS)
   8175       : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {}
   8176 
   8177   /// \brief Get the bits used in a chunk of bits \p BitWidth large.
   8178   /// \return Result is \p BitWidth and has used bits set to 1 and
   8179   ///         not used bits set to 0.
   8180   APInt getUsedBits() const {
   8181     // Reproduce the trunc(lshr) sequence:
   8182     // - Start from the truncated value.
   8183     // - Zero extend to the desired bit width.
   8184     // - Shift left.
   8185     assert(Origin && "No original load to compare against.");
   8186     unsigned BitWidth = Origin->getValueSizeInBits(0);
   8187     assert(Inst && "This slice is not bound to an instruction");
   8188     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
   8189            "Extracted slice is bigger than the whole type!");
   8190     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
   8191     UsedBits.setAllBits();
   8192     UsedBits = UsedBits.zext(BitWidth);
   8193     UsedBits <<= Shift;
   8194     return UsedBits;
   8195   }
   8196 
   8197   /// \brief Get the size of the slice to be loaded in bytes.
   8198   unsigned getLoadedSize() const {
   8199     unsigned SliceSize = getUsedBits().countPopulation();
   8200     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
   8201     return SliceSize / 8;
   8202   }
   8203 
   8204   /// \brief Get the type that will be loaded for this slice.
   8205   /// Note: This may not be the final type for the slice.
   8206   EVT getLoadedType() const {
   8207     assert(DAG && "Missing context");
   8208     LLVMContext &Ctxt = *DAG->getContext();
   8209     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
   8210   }
   8211 
   8212   /// \brief Get the alignment of the load used for this slice.
   8213   unsigned getAlignment() const {
   8214     unsigned Alignment = Origin->getAlignment();
   8215     unsigned Offset = getOffsetFromBase();
   8216     if (Offset != 0)
   8217       Alignment = MinAlign(Alignment, Alignment + Offset);
   8218     return Alignment;
   8219   }
   8220 
   8221   /// \brief Check if this slice can be rewritten with legal operations.
   8222   bool isLegal() const {
   8223     // An invalid slice is not legal.
   8224     if (!Origin || !Inst || !DAG)
   8225       return false;
   8226 
   8227     // Offsets are for indexed load only, we do not handle that.
   8228     if (Origin->getOffset().getOpcode() != ISD::UNDEF)
   8229       return false;
   8230 
   8231     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
   8232 
   8233     // Check that the type is legal.
   8234     EVT SliceType = getLoadedType();
   8235     if (!TLI.isTypeLegal(SliceType))
   8236       return false;
   8237 
   8238     // Check that the load is legal for this type.
   8239     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
   8240       return false;
   8241 
   8242     // Check that the offset can be computed.
   8243     // 1. Check its type.
   8244     EVT PtrType = Origin->getBasePtr().getValueType();
   8245     if (PtrType == MVT::Untyped || PtrType.isExtended())
   8246       return false;
   8247 
   8248     // 2. Check that it fits in the immediate.
   8249     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
   8250       return false;
   8251 
   8252     // 3. Check that the computation is legal.
   8253     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
   8254       return false;
   8255 
   8256     // Check that the zext is legal if it needs one.
   8257     EVT TruncateType = Inst->getValueType(0);
   8258     if (TruncateType != SliceType &&
   8259         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
   8260       return false;
   8261 
   8262     return true;
   8263   }
   8264 
   8265   /// \brief Get the offset in bytes of this slice in the original chunk of
   8266   /// bits.
   8267   /// \pre DAG != nullptr.
   8268   uint64_t getOffsetFromBase() const {
   8269     assert(DAG && "Missing context.");
   8270     bool IsBigEndian =
   8271         DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian();
   8272     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
   8273     uint64_t Offset = Shift / 8;
   8274     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
   8275     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
   8276            "The size of the original loaded type is not a multiple of a"
   8277            " byte.");
   8278     // If Offset is bigger than TySizeInBytes, it means we are loading all
   8279     // zeros. This should have been optimized before in the process.
   8280     assert(TySizeInBytes > Offset &&
   8281            "Invalid shift amount for given loaded size");
   8282     if (IsBigEndian)
   8283       Offset = TySizeInBytes - Offset - getLoadedSize();
   8284     return Offset;
   8285   }
   8286 
   8287   /// \brief Generate the sequence of instructions to load the slice
   8288   /// represented by this object and redirect the uses of this slice to
   8289   /// this new sequence of instructions.
   8290   /// \pre this->Inst && this->Origin are valid Instructions and this
   8291   /// object passed the legal check: LoadedSlice::isLegal returned true.
   8292   /// \return The last instruction of the sequence used to load the slice.
   8293   SDValue loadSlice() const {
   8294     assert(Inst && Origin && "Unable to replace a non-existing slice.");
   8295     const SDValue &OldBaseAddr = Origin->getBasePtr();
   8296     SDValue BaseAddr = OldBaseAddr;
   8297     // Get the offset in that chunk of bytes w.r.t. the endianess.
   8298     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
   8299     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
   8300     if (Offset) {
   8301       // BaseAddr = BaseAddr + Offset.
   8302       EVT ArithType = BaseAddr.getValueType();
   8303       BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr,
   8304                               DAG->getConstant(Offset, ArithType));
   8305     }
   8306 
   8307     // Create the type of the loaded slice according to its size.
   8308     EVT SliceType = getLoadedType();
   8309 
   8310     // Create the load for the slice.
   8311     SDValue LastInst = DAG->getLoad(
   8312         SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
   8313         Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(),
   8314         Origin->isNonTemporal(), Origin->isInvariant(), getAlignment());
   8315     // If the final type is not the same as the loaded type, this means that
   8316     // we have to pad with zero. Create a zero extend for that.
   8317     EVT FinalType = Inst->getValueType(0);
   8318     if (SliceType != FinalType)
   8319       LastInst =
   8320           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
   8321     return LastInst;
   8322   }
   8323 
   8324   /// \brief Check if this slice can be merged with an expensive cross register
   8325   /// bank copy. E.g.,
   8326   /// i = load i32
   8327   /// f = bitcast i32 i to float
   8328   bool canMergeExpensiveCrossRegisterBankCopy() const {
   8329     if (!Inst || !Inst->hasOneUse())
   8330       return false;
   8331     SDNode *Use = *Inst->use_begin();
   8332     if (Use->getOpcode() != ISD::BITCAST)
   8333       return false;
   8334     assert(DAG && "Missing context");
   8335     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
   8336     EVT ResVT = Use->getValueType(0);
   8337     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
   8338     const TargetRegisterClass *ArgRC =
   8339         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
   8340     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
   8341       return false;
   8342 
   8343     // At this point, we know that we perform a cross-register-bank copy.
   8344     // Check if it is expensive.
   8345     const TargetRegisterInfo *TRI = TLI.getTargetMachine().getRegisterInfo();
   8346     // Assume bitcasts are cheap, unless both register classes do not
   8347     // explicitly share a common sub class.
   8348     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
   8349       return false;
   8350 
   8351     // Check if it will be merged with the load.
   8352     // 1. Check the alignment constraint.
   8353     unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment(
   8354         ResVT.getTypeForEVT(*DAG->getContext()));
   8355 
   8356     if (RequiredAlignment > getAlignment())
   8357       return false;
   8358 
   8359     // 2. Check that the load is a legal operation for that type.
   8360     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
   8361       return false;
   8362 
   8363     // 3. Check that we do not have a zext in the way.
   8364     if (Inst->getValueType(0) != getLoadedType())
   8365       return false;
   8366 
   8367     return true;
   8368   }
   8369 };
   8370 }
   8371 
   8372 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
   8373 /// \p UsedBits looks like 0..0 1..1 0..0.
   8374 static bool areUsedBitsDense(const APInt &UsedBits) {
   8375   // If all the bits are one, this is dense!
   8376   if (UsedBits.isAllOnesValue())
   8377     return true;
   8378 
   8379   // Get rid of the unused bits on the right.
   8380   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
   8381   // Get rid of the unused bits on the left.
   8382   if (NarrowedUsedBits.countLeadingZeros())
   8383     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
   8384   // Check that the chunk of bits is completely used.
   8385   return NarrowedUsedBits.isAllOnesValue();
   8386 }
   8387 
   8388 /// \brief Check whether or not \p First and \p Second are next to each other
   8389 /// in memory. This means that there is no hole between the bits loaded
   8390 /// by \p First and the bits loaded by \p Second.
   8391 static bool areSlicesNextToEachOther(const LoadedSlice &First,
   8392                                      const LoadedSlice &Second) {
   8393   assert(First.Origin == Second.Origin && First.Origin &&
   8394          "Unable to match different memory origins.");
   8395   APInt UsedBits = First.getUsedBits();
   8396   assert((UsedBits & Second.getUsedBits()) == 0 &&
   8397          "Slices are not supposed to overlap.");
   8398   UsedBits |= Second.getUsedBits();
   8399   return areUsedBitsDense(UsedBits);
   8400 }
   8401 
   8402 /// \brief Adjust the \p GlobalLSCost according to the target
   8403 /// paring capabilities and the layout of the slices.
   8404 /// \pre \p GlobalLSCost should account for at least as many loads as
   8405 /// there is in the slices in \p LoadedSlices.
   8406 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
   8407                                  LoadedSlice::Cost &GlobalLSCost) {
   8408   unsigned NumberOfSlices = LoadedSlices.size();
   8409   // If there is less than 2 elements, no pairing is possible.
   8410   if (NumberOfSlices < 2)
   8411     return;
   8412 
   8413   // Sort the slices so that elements that are likely to be next to each
   8414   // other in memory are next to each other in the list.
   8415   std::sort(LoadedSlices.begin(), LoadedSlices.end(),
   8416             [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
   8417     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
   8418     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
   8419   });
   8420   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
   8421   // First (resp. Second) is the first (resp. Second) potentially candidate
   8422   // to be placed in a paired load.
   8423   const LoadedSlice *First = nullptr;
   8424   const LoadedSlice *Second = nullptr;
   8425   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
   8426                 // Set the beginning of the pair.
   8427                                                            First = Second) {
   8428 
   8429     Second = &LoadedSlices[CurrSlice];
   8430 
   8431     // If First is NULL, it means we start a new pair.
   8432     // Get to the next slice.
   8433     if (!First)
   8434       continue;
   8435 
   8436     EVT LoadedType = First->getLoadedType();
   8437 
   8438     // If the types of the slices are different, we cannot pair them.
   8439     if (LoadedType != Second->getLoadedType())
   8440       continue;
   8441 
   8442     // Check if the target supplies paired loads for this type.
   8443     unsigned RequiredAlignment = 0;
   8444     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
   8445       // move to the next pair, this type is hopeless.
   8446       Second = nullptr;
   8447       continue;
   8448     }
   8449     // Check if we meet the alignment requirement.
   8450     if (RequiredAlignment > First->getAlignment())
   8451       continue;
   8452 
   8453     // Check that both loads are next to each other in memory.
   8454     if (!areSlicesNextToEachOther(*First, *Second))
   8455       continue;
   8456 
   8457     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
   8458     --GlobalLSCost.Loads;
   8459     // Move to the next pair.
   8460     Second = nullptr;
   8461   }
   8462 }
   8463 
   8464 /// \brief Check the profitability of all involved LoadedSlice.
   8465 /// Currently, it is considered profitable if there is exactly two
   8466 /// involved slices (1) which are (2) next to each other in memory, and
   8467 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
   8468 ///
   8469 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
   8470 /// the elements themselves.
   8471 ///
   8472 /// FIXME: When the cost model will be mature enough, we can relax
   8473 /// constraints (1) and (2).
   8474 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
   8475                                 const APInt &UsedBits, bool ForCodeSize) {
   8476   unsigned NumberOfSlices = LoadedSlices.size();
   8477   if (StressLoadSlicing)
   8478     return NumberOfSlices > 1;
   8479 
   8480   // Check (1).
   8481   if (NumberOfSlices != 2)
   8482     return false;
   8483 
   8484   // Check (2).
   8485   if (!areUsedBitsDense(UsedBits))
   8486     return false;
   8487 
   8488   // Check (3).
   8489   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
   8490   // The original code has one big load.
   8491   OrigCost.Loads = 1;
   8492   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
   8493     const LoadedSlice &LS = LoadedSlices[CurrSlice];
   8494     // Accumulate the cost of all the slices.
   8495     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
   8496     GlobalSlicingCost += SliceCost;
   8497 
   8498     // Account as cost in the original configuration the gain obtained
   8499     // with the current slices.
   8500     OrigCost.addSliceGain(LS);
   8501   }
   8502 
   8503   // If the target supports paired load, adjust the cost accordingly.
   8504   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
   8505   return OrigCost > GlobalSlicingCost;
   8506 }
   8507 
   8508 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
   8509 /// operations, split it in the various pieces being extracted.
   8510 ///
   8511 /// This sort of thing is introduced by SROA.
   8512 /// This slicing takes care not to insert overlapping loads.
   8513 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
   8514 bool DAGCombiner::SliceUpLoad(SDNode *N) {
   8515   if (Level < AfterLegalizeDAG)
   8516     return false;
   8517 
   8518   LoadSDNode *LD = cast<LoadSDNode>(N);
   8519   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
   8520       !LD->getValueType(0).isInteger())
   8521     return false;
   8522 
   8523   // Keep track of already used bits to detect overlapping values.
   8524   // In that case, we will just abort the transformation.
   8525   APInt UsedBits(LD->getValueSizeInBits(0), 0);
   8526 
   8527   SmallVector<LoadedSlice, 4> LoadedSlices;
   8528 
   8529   // Check if this load is used as several smaller chunks of bits.
   8530   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
   8531   // of computation for each trunc.
   8532   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
   8533        UI != UIEnd; ++UI) {
   8534     // Skip the uses of the chain.
   8535     if (UI.getUse().getResNo() != 0)
   8536       continue;
   8537 
   8538     SDNode *User = *UI;
   8539     unsigned Shift = 0;
   8540 
   8541     // Check if this is a trunc(lshr).
   8542     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
   8543         isa<ConstantSDNode>(User->getOperand(1))) {
   8544       Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue();
   8545       User = *User->use_begin();
   8546     }
   8547 
   8548     // At this point, User is a Truncate, iff we encountered, trunc or
   8549     // trunc(lshr).
   8550     if (User->getOpcode() != ISD::TRUNCATE)
   8551       return false;
   8552 
   8553     // The width of the type must be a power of 2 and greater than 8-bits.
   8554     // Otherwise the load cannot be represented in LLVM IR.
   8555     // Moreover, if we shifted with a non-8-bits multiple, the slice
   8556     // will be across several bytes. We do not support that.
   8557     unsigned Width = User->getValueSizeInBits(0);
   8558     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
   8559       return 0;
   8560 
   8561     // Build the slice for this chain of computations.
   8562     LoadedSlice LS(User, LD, Shift, &DAG);
   8563     APInt CurrentUsedBits = LS.getUsedBits();
   8564 
   8565     // Check if this slice overlaps with another.
   8566     if ((CurrentUsedBits & UsedBits) != 0)
   8567       return false;
   8568     // Update the bits used globally.
   8569     UsedBits |= CurrentUsedBits;
   8570 
   8571     // Check if the new slice would be legal.
   8572     if (!LS.isLegal())
   8573       return false;
   8574 
   8575     // Record the slice.
   8576     LoadedSlices.push_back(LS);
   8577   }
   8578 
   8579   // Abort slicing if it does not seem to be profitable.
   8580   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
   8581     return false;
   8582 
   8583   ++SlicedLoads;
   8584 
   8585   // Rewrite each chain to use an independent load.
   8586   // By construction, each chain can be represented by a unique load.
   8587 
   8588   // Prepare the argument for the new token factor for all the slices.
   8589   SmallVector<SDValue, 8> ArgChains;
   8590   for (SmallVectorImpl<LoadedSlice>::const_iterator
   8591            LSIt = LoadedSlices.begin(),
   8592            LSItEnd = LoadedSlices.end();
   8593        LSIt != LSItEnd; ++LSIt) {
   8594     SDValue SliceInst = LSIt->loadSlice();
   8595     CombineTo(LSIt->Inst, SliceInst, true);
   8596     if (SliceInst.getNode()->getOpcode() != ISD::LOAD)
   8597       SliceInst = SliceInst.getOperand(0);
   8598     assert(SliceInst->getOpcode() == ISD::LOAD &&
   8599            "It takes more than a zext to get to the loaded slice!!");
   8600     ArgChains.push_back(SliceInst.getValue(1));
   8601   }
   8602 
   8603   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
   8604                               ArgChains);
   8605   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
   8606   return true;
   8607 }
   8608 
   8609 /// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the
   8610 /// load is having specific bytes cleared out.  If so, return the byte size
   8611 /// being masked out and the shift amount.
   8612 static std::pair<unsigned, unsigned>
   8613 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
   8614   std::pair<unsigned, unsigned> Result(0, 0);
   8615 
   8616   // Check for the structure we're looking for.
   8617   if (V->getOpcode() != ISD::AND ||
   8618       !isa<ConstantSDNode>(V->getOperand(1)) ||
   8619       !ISD::isNormalLoad(V->getOperand(0).getNode()))
   8620     return Result;
   8621 
   8622   // Check the chain and pointer.
   8623   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
   8624   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
   8625 
   8626   // The store should be chained directly to the load or be an operand of a
   8627   // tokenfactor.
   8628   if (LD == Chain.getNode())
   8629     ; // ok.
   8630   else if (Chain->getOpcode() != ISD::TokenFactor)
   8631     return Result; // Fail.
   8632   else {
   8633     bool isOk = false;
   8634     for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i)
   8635       if (Chain->getOperand(i).getNode() == LD) {
   8636         isOk = true;
   8637         break;
   8638       }
   8639     if (!isOk) return Result;
   8640   }
   8641 
   8642   // This only handles simple types.
   8643   if (V.getValueType() != MVT::i16 &&
   8644       V.getValueType() != MVT::i32 &&
   8645       V.getValueType() != MVT::i64)
   8646     return Result;
   8647 
   8648   // Check the constant mask.  Invert it so that the bits being masked out are
   8649   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
   8650   // follow the sign bit for uniformity.
   8651   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
   8652   unsigned NotMaskLZ = countLeadingZeros(NotMask);
   8653   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
   8654   unsigned NotMaskTZ = countTrailingZeros(NotMask);
   8655   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
   8656   if (NotMaskLZ == 64) return Result;  // All zero mask.
   8657 
   8658   // See if we have a continuous run of bits.  If so, we have 0*1+0*
   8659   if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
   8660     return Result;
   8661 
   8662   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
   8663   if (V.getValueType() != MVT::i64 && NotMaskLZ)
   8664     NotMaskLZ -= 64-V.getValueSizeInBits();
   8665 
   8666   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
   8667   switch (MaskedBytes) {
   8668   case 1:
   8669   case 2:
   8670   case 4: break;
   8671   default: return Result; // All one mask, or 5-byte mask.
   8672   }
   8673 
   8674   // Verify that the first bit starts at a multiple of mask so that the access
   8675   // is aligned the same as the access width.
   8676   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
   8677 
   8678   Result.first = MaskedBytes;
   8679   Result.second = NotMaskTZ/8;
   8680   return Result;
   8681 }
   8682 
   8683 
   8684 /// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that
   8685 /// provides a value as specified by MaskInfo.  If so, replace the specified
   8686 /// store with a narrower store of truncated IVal.
   8687 static SDNode *
   8688 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
   8689                                 SDValue IVal, StoreSDNode *St,
   8690                                 DAGCombiner *DC) {
   8691   unsigned NumBytes = MaskInfo.first;
   8692   unsigned ByteShift = MaskInfo.second;
   8693   SelectionDAG &DAG = DC->getDAG();
   8694 
   8695   // Check to see if IVal is all zeros in the part being masked in by the 'or'
   8696   // that uses this.  If not, this is not a replacement.
   8697   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
   8698                                   ByteShift*8, (ByteShift+NumBytes)*8);
   8699   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
   8700 
   8701   // Check that it is legal on the target to do this.  It is legal if the new
   8702   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
   8703   // legalization.
   8704   MVT VT = MVT::getIntegerVT(NumBytes*8);
   8705   if (!DC->isTypeLegal(VT))
   8706     return nullptr;
   8707 
   8708   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
   8709   // shifted by ByteShift and truncated down to NumBytes.
   8710   if (ByteShift)
   8711     IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal,
   8712                        DAG.getConstant(ByteShift*8,
   8713                                     DC->getShiftAmountTy(IVal.getValueType())));
   8714 
   8715   // Figure out the offset for the store and the alignment of the access.
   8716   unsigned StOffset;
   8717   unsigned NewAlign = St->getAlignment();
   8718 
   8719   if (DAG.getTargetLoweringInfo().isLittleEndian())
   8720     StOffset = ByteShift;
   8721   else
   8722     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
   8723 
   8724   SDValue Ptr = St->getBasePtr();
   8725   if (StOffset) {
   8726     Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(),
   8727                       Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
   8728     NewAlign = MinAlign(NewAlign, StOffset);
   8729   }
   8730 
   8731   // Truncate down to the new size.
   8732   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
   8733 
   8734   ++OpsNarrowed;
   8735   return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
   8736                       St->getPointerInfo().getWithOffset(StOffset),
   8737                       false, false, NewAlign).getNode();
   8738 }
   8739 
   8740 
   8741 /// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
   8742 /// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
   8743 /// of the loaded bits, try narrowing the load and store if it would end up
   8744 /// being a win for performance or code size.
   8745 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
   8746   StoreSDNode *ST  = cast<StoreSDNode>(N);
   8747   if (ST->isVolatile())
   8748     return SDValue();
   8749 
   8750   SDValue Chain = ST->getChain();
   8751   SDValue Value = ST->getValue();
   8752   SDValue Ptr   = ST->getBasePtr();
   8753   EVT VT = Value.getValueType();
   8754 
   8755   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
   8756     return SDValue();
   8757 
   8758   unsigned Opc = Value.getOpcode();
   8759 
   8760   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
   8761   // is a byte mask indicating a consecutive number of bytes, check to see if
   8762   // Y is known to provide just those bytes.  If so, we try to replace the
   8763   // load + replace + store sequence with a single (narrower) store, which makes
   8764   // the load dead.
   8765   if (Opc == ISD::OR) {
   8766     std::pair<unsigned, unsigned> MaskedLoad;
   8767     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
   8768     if (MaskedLoad.first)
   8769       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
   8770                                                   Value.getOperand(1), ST,this))
   8771         return SDValue(NewST, 0);
   8772 
   8773     // Or is commutative, so try swapping X and Y.
   8774     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
   8775     if (MaskedLoad.first)
   8776       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
   8777                                                   Value.getOperand(0), ST,this))
   8778         return SDValue(NewST, 0);
   8779   }
   8780 
   8781   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
   8782       Value.getOperand(1).getOpcode() != ISD::Constant)
   8783     return SDValue();
   8784 
   8785   SDValue N0 = Value.getOperand(0);
   8786   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
   8787       Chain == SDValue(N0.getNode(), 1)) {
   8788     LoadSDNode *LD = cast<LoadSDNode>(N0);
   8789     if (LD->getBasePtr() != Ptr ||
   8790         LD->getPointerInfo().getAddrSpace() !=
   8791         ST->getPointerInfo().getAddrSpace())
   8792       return SDValue();
   8793 
   8794     // Find the type to narrow it the load / op / store to.
   8795     SDValue N1 = Value.getOperand(1);
   8796     unsigned BitWidth = N1.getValueSizeInBits();
   8797     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
   8798     if (Opc == ISD::AND)
   8799       Imm ^= APInt::getAllOnesValue(BitWidth);
   8800     if (Imm == 0 || Imm.isAllOnesValue())
   8801       return SDValue();
   8802     unsigned ShAmt = Imm.countTrailingZeros();
   8803     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
   8804     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
   8805     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
   8806     while (NewBW < BitWidth &&
   8807            !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
   8808              TLI.isNarrowingProfitable(VT, NewVT))) {
   8809       NewBW = NextPowerOf2(NewBW);
   8810       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
   8811     }
   8812     if (NewBW >= BitWidth)
   8813       return SDValue();
   8814 
   8815     // If the lsb changed does not start at the type bitwidth boundary,
   8816     // start at the previous one.
   8817     if (ShAmt % NewBW)
   8818       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
   8819     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
   8820                                    std::min(BitWidth, ShAmt + NewBW));
   8821     if ((Imm & Mask) == Imm) {
   8822       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
   8823       if (Opc == ISD::AND)
   8824         NewImm ^= APInt::getAllOnesValue(NewBW);
   8825       uint64_t PtrOff = ShAmt / 8;
   8826       // For big endian targets, we need to adjust the offset to the pointer to
   8827       // load the correct bytes.
   8828       if (TLI.isBigEndian())
   8829         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
   8830 
   8831       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
   8832       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
   8833       if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy))
   8834         return SDValue();
   8835 
   8836       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
   8837                                    Ptr.getValueType(), Ptr,
   8838                                    DAG.getConstant(PtrOff, Ptr.getValueType()));
   8839       SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0),
   8840                                   LD->getChain(), NewPtr,
   8841                                   LD->getPointerInfo().getWithOffset(PtrOff),
   8842                                   LD->isVolatile(), LD->isNonTemporal(),
   8843                                   LD->isInvariant(), NewAlign,
   8844                                   LD->getTBAAInfo());
   8845       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
   8846                                    DAG.getConstant(NewImm, NewVT));
   8847       SDValue NewST = DAG.getStore(Chain, SDLoc(N),
   8848                                    NewVal, NewPtr,
   8849                                    ST->getPointerInfo().getWithOffset(PtrOff),
   8850                                    false, false, NewAlign);
   8851 
   8852       AddToWorkList(NewPtr.getNode());
   8853       AddToWorkList(NewLD.getNode());
   8854       AddToWorkList(NewVal.getNode());
   8855       WorkListRemover DeadNodes(*this);
   8856       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
   8857       ++OpsNarrowed;
   8858       return NewST;
   8859     }
   8860   }
   8861 
   8862   return SDValue();
   8863 }
   8864 
   8865 /// TransformFPLoadStorePair - For a given floating point load / store pair,
   8866 /// if the load value isn't used by any other operations, then consider
   8867 /// transforming the pair to integer load / store operations if the target
   8868 /// deems the transformation profitable.
   8869 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
   8870   StoreSDNode *ST  = cast<StoreSDNode>(N);
   8871   SDValue Chain = ST->getChain();
   8872   SDValue Value = ST->getValue();
   8873   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
   8874       Value.hasOneUse() &&
   8875       Chain == SDValue(Value.getNode(), 1)) {
   8876     LoadSDNode *LD = cast<LoadSDNode>(Value);
   8877     EVT VT = LD->getMemoryVT();
   8878     if (!VT.isFloatingPoint() ||
   8879         VT != ST->getMemoryVT() ||
   8880         LD->isNonTemporal() ||
   8881         ST->isNonTemporal() ||
   8882         LD->getPointerInfo().getAddrSpace() != 0 ||
   8883         ST->getPointerInfo().getAddrSpace() != 0)
   8884       return SDValue();
   8885 
   8886     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
   8887     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
   8888         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
   8889         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
   8890         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
   8891       return SDValue();
   8892 
   8893     unsigned LDAlign = LD->getAlignment();
   8894     unsigned STAlign = ST->getAlignment();
   8895     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
   8896     unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy);
   8897     if (LDAlign < ABIAlign || STAlign < ABIAlign)
   8898       return SDValue();
   8899 
   8900     SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value),
   8901                                 LD->getChain(), LD->getBasePtr(),
   8902                                 LD->getPointerInfo(),
   8903                                 false, false, false, LDAlign);
   8904 
   8905     SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N),
   8906                                  NewLD, ST->getBasePtr(),
   8907                                  ST->getPointerInfo(),
   8908                                  false, false, STAlign);
   8909 
   8910     AddToWorkList(NewLD.getNode());
   8911     AddToWorkList(NewST.getNode());
   8912     WorkListRemover DeadNodes(*this);
   8913     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
   8914     ++LdStFP2Int;
   8915     return NewST;
   8916   }
   8917 
   8918   return SDValue();
   8919 }
   8920 
   8921 /// Helper struct to parse and store a memory address as base + index + offset.
   8922 /// We ignore sign extensions when it is safe to do so.
   8923 /// The following two expressions are not equivalent. To differentiate we need
   8924 /// to store whether there was a sign extension involved in the index
   8925 /// computation.
   8926 ///  (load (i64 add (i64 copyfromreg %c)
   8927 ///                 (i64 signextend (add (i8 load %index)
   8928 ///                                      (i8 1))))
   8929 /// vs
   8930 ///
   8931 /// (load (i64 add (i64 copyfromreg %c)
   8932 ///                (i64 signextend (i32 add (i32 signextend (i8 load %index))
   8933 ///                                         (i32 1)))))
   8934 struct BaseIndexOffset {
   8935   SDValue Base;
   8936   SDValue Index;
   8937   int64_t Offset;
   8938   bool IsIndexSignExt;
   8939 
   8940   BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
   8941 
   8942   BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
   8943                   bool IsIndexSignExt) :
   8944     Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
   8945 
   8946   bool equalBaseIndex(const BaseIndexOffset &Other) {
   8947     return Other.Base == Base && Other.Index == Index &&
   8948       Other.IsIndexSignExt == IsIndexSignExt;
   8949   }
   8950 
   8951   /// Parses tree in Ptr for base, index, offset addresses.
   8952   static BaseIndexOffset match(SDValue Ptr) {
   8953     bool IsIndexSignExt = false;
   8954 
   8955     // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
   8956     // instruction, then it could be just the BASE or everything else we don't
   8957     // know how to handle. Just use Ptr as BASE and give up.
   8958     if (Ptr->getOpcode() != ISD::ADD)
   8959       return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
   8960 
   8961     // We know that we have at least an ADD instruction. Try to pattern match
   8962     // the simple case of BASE + OFFSET.
   8963     if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
   8964       int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
   8965       return  BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
   8966                               IsIndexSignExt);
   8967     }
   8968 
   8969     // Inside a loop the current BASE pointer is calculated using an ADD and a
   8970     // MUL instruction. In this case Ptr is the actual BASE pointer.
   8971     // (i64 add (i64 %array_ptr)
   8972     //          (i64 mul (i64 %induction_var)
   8973     //                   (i64 %element_size)))
   8974     if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
   8975       return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
   8976 
   8977     // Look at Base + Index + Offset cases.
   8978     SDValue Base = Ptr->getOperand(0);
   8979     SDValue IndexOffset = Ptr->getOperand(1);
   8980 
   8981     // Skip signextends.
   8982     if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
   8983       IndexOffset = IndexOffset->getOperand(0);
   8984       IsIndexSignExt = true;
   8985     }
   8986 
   8987     // Either the case of Base + Index (no offset) or something else.
   8988     if (IndexOffset->getOpcode() != ISD::ADD)
   8989       return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
   8990 
   8991     // Now we have the case of Base + Index + offset.
   8992     SDValue Index = IndexOffset->getOperand(0);
   8993     SDValue Offset = IndexOffset->getOperand(1);
   8994 
   8995     if (!isa<ConstantSDNode>(Offset))
   8996       return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
   8997 
   8998     // Ignore signextends.
   8999     if (Index->getOpcode() == ISD::SIGN_EXTEND) {
   9000       Index = Index->getOperand(0);
   9001       IsIndexSignExt = true;
   9002     } else IsIndexSignExt = false;
   9003 
   9004     int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
   9005     return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
   9006   }
   9007 };
   9008 
   9009 /// Holds a pointer to an LSBaseSDNode as well as information on where it
   9010 /// is located in a sequence of memory operations connected by a chain.
   9011 struct MemOpLink {
   9012   MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
   9013     MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
   9014   // Ptr to the mem node.
   9015   LSBaseSDNode *MemNode;
   9016   // Offset from the base ptr.
   9017   int64_t OffsetFromBase;
   9018   // What is the sequence number of this mem node.
   9019   // Lowest mem operand in the DAG starts at zero.
   9020   unsigned SequenceNum;
   9021 };
   9022 
   9023 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   9024   EVT MemVT = St->getMemoryVT();
   9025   int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
   9026   bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes().
   9027     hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
   9028 
   9029   // Don't merge vectors into wider inputs.
   9030   if (MemVT.isVector() || !MemVT.isSimple())
   9031     return false;
   9032 
   9033   // Perform an early exit check. Do not bother looking at stored values that
   9034   // are not constants or loads.
   9035   SDValue StoredVal = St->getValue();
   9036   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
   9037   if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) &&
   9038       !IsLoadSrc)
   9039     return false;
   9040 
   9041   // Only look at ends of store sequences.
   9042   SDValue Chain = SDValue(St, 1);
   9043   if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
   9044     return false;
   9045 
   9046   // This holds the base pointer, index, and the offset in bytes from the base
   9047   // pointer.
   9048   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
   9049 
   9050   // We must have a base and an offset.
   9051   if (!BasePtr.Base.getNode())
   9052     return false;
   9053 
   9054   // Do not handle stores to undef base pointers.
   9055   if (BasePtr.Base.getOpcode() == ISD::UNDEF)
   9056     return false;
   9057 
   9058   // Save the LoadSDNodes that we find in the chain.
   9059   // We need to make sure that these nodes do not interfere with
   9060   // any of the store nodes.
   9061   SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
   9062 
   9063   // Save the StoreSDNodes that we find in the chain.
   9064   SmallVector<MemOpLink, 8> StoreNodes;
   9065 
   9066   // Walk up the chain and look for nodes with offsets from the same
   9067   // base pointer. Stop when reaching an instruction with a different kind
   9068   // or instruction which has a different base pointer.
   9069   unsigned Seq = 0;
   9070   StoreSDNode *Index = St;
   9071   while (Index) {
   9072     // If the chain has more than one use, then we can't reorder the mem ops.
   9073     if (Index != St && !SDValue(Index, 1)->hasOneUse())
   9074       break;
   9075 
   9076     // Find the base pointer and offset for this memory node.
   9077     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
   9078 
   9079     // Check that the base pointer is the same as the original one.
   9080     if (!Ptr.equalBaseIndex(BasePtr))
   9081       break;
   9082 
   9083     // Check that the alignment is the same.
   9084     if (Index->getAlignment() != St->getAlignment())
   9085       break;
   9086 
   9087     // The memory operands must not be volatile.
   9088     if (Index->isVolatile() || Index->isIndexed())
   9089       break;
   9090 
   9091     // No truncation.
   9092     if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index))
   9093       if (St->isTruncatingStore())
   9094         break;
   9095 
   9096     // The stored memory type must be the same.
   9097     if (Index->getMemoryVT() != MemVT)
   9098       break;
   9099 
   9100     // We do not allow unaligned stores because we want to prevent overriding
   9101     // stores.
   9102     if (Index->getAlignment()*8 != MemVT.getSizeInBits())
   9103       break;
   9104 
   9105     // We found a potential memory operand to merge.
   9106     StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
   9107 
   9108     // Find the next memory operand in the chain. If the next operand in the
   9109     // chain is a store then move up and continue the scan with the next
   9110     // memory operand. If the next operand is a load save it and use alias
   9111     // information to check if it interferes with anything.
   9112     SDNode *NextInChain = Index->getChain().getNode();
   9113     while (1) {
   9114       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
   9115         // We found a store node. Use it for the next iteration.
   9116         Index = STn;
   9117         break;
   9118       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
   9119         if (Ldn->isVolatile()) {
   9120           Index = nullptr;
   9121           break;
   9122         }
   9123 
   9124         // Save the load node for later. Continue the scan.
   9125         AliasLoadNodes.push_back(Ldn);
   9126         NextInChain = Ldn->getChain().getNode();
   9127         continue;
   9128       } else {
   9129         Index = nullptr;
   9130         break;
   9131       }
   9132     }
   9133   }
   9134 
   9135   // Check if there is anything to merge.
   9136   if (StoreNodes.size() < 2)
   9137     return false;
   9138 
   9139   // Sort the memory operands according to their distance from the base pointer.
   9140   std::sort(StoreNodes.begin(), StoreNodes.end(),
   9141             [](MemOpLink LHS, MemOpLink RHS) {
   9142     return LHS.OffsetFromBase < RHS.OffsetFromBase ||
   9143            (LHS.OffsetFromBase == RHS.OffsetFromBase &&
   9144             LHS.SequenceNum > RHS.SequenceNum);
   9145   });
   9146 
   9147   // Scan the memory operations on the chain and find the first non-consecutive
   9148   // store memory address.
   9149   unsigned LastConsecutiveStore = 0;
   9150   int64_t StartAddress = StoreNodes[0].OffsetFromBase;
   9151   for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
   9152 
   9153     // Check that the addresses are consecutive starting from the second
   9154     // element in the list of stores.
   9155     if (i > 0) {
   9156       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
   9157       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
   9158         break;
   9159     }
   9160 
   9161     bool Alias = false;
   9162     // Check if this store interferes with any of the loads that we found.
   9163     for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld)
   9164       if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) {
   9165         Alias = true;
   9166         break;
   9167       }
   9168     // We found a load that alias with this store. Stop the sequence.
   9169     if (Alias)
   9170       break;
   9171 
   9172     // Mark this node as useful.
   9173     LastConsecutiveStore = i;
   9174   }
   9175 
   9176   // The node with the lowest store address.
   9177   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
   9178 
   9179   // Store the constants into memory as one consecutive store.
   9180   if (!IsLoadSrc) {
   9181     unsigned LastLegalType = 0;
   9182     unsigned LastLegalVectorType = 0;
   9183     bool NonZero = false;
   9184     for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
   9185       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
   9186       SDValue StoredVal = St->getValue();
   9187 
   9188       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
   9189         NonZero |= !C->isNullValue();
   9190       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
   9191         NonZero |= !C->getConstantFPValue()->isNullValue();
   9192       } else {
   9193         // Non-constant.
   9194         break;
   9195       }
   9196 
   9197       // Find a legal type for the constant store.
   9198       unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
   9199       EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
   9200       if (TLI.isTypeLegal(StoreTy))
   9201         LastLegalType = i+1;
   9202       // Or check whether a truncstore is legal.
   9203       else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
   9204                TargetLowering::TypePromoteInteger) {
   9205         EVT LegalizedStoredValueTy =
   9206           TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
   9207         if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy))
   9208           LastLegalType = i+1;
   9209       }
   9210 
   9211       // Find a legal type for the vector store.
   9212       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
   9213       if (TLI.isTypeLegal(Ty))
   9214         LastLegalVectorType = i + 1;
   9215     }
   9216 
   9217     // We only use vectors if the constant is known to be zero and the
   9218     // function is not marked with the noimplicitfloat attribute.
   9219     if (NonZero || NoVectors)
   9220       LastLegalVectorType = 0;
   9221 
   9222     // Check if we found a legal integer type to store.
   9223     if (LastLegalType == 0 && LastLegalVectorType == 0)
   9224       return false;
   9225 
   9226     bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
   9227     unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
   9228 
   9229     // Make sure we have something to merge.
   9230     if (NumElem < 2)
   9231       return false;
   9232 
   9233     unsigned EarliestNodeUsed = 0;
   9234     for (unsigned i=0; i < NumElem; ++i) {
   9235       // Find a chain for the new wide-store operand. Notice that some
   9236       // of the store nodes that we found may not be selected for inclusion
   9237       // in the wide store. The chain we use needs to be the chain of the
   9238       // earliest store node which is *used* and replaced by the wide store.
   9239       if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
   9240         EarliestNodeUsed = i;
   9241     }
   9242 
   9243     // The earliest Node in the DAG.
   9244     LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
   9245     SDLoc DL(StoreNodes[0].MemNode);
   9246 
   9247     SDValue StoredVal;
   9248     if (UseVector) {
   9249       // Find a legal type for the vector store.
   9250       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
   9251       assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
   9252       StoredVal = DAG.getConstant(0, Ty);
   9253     } else {
   9254       unsigned StoreBW = NumElem * ElementSizeBytes * 8;
   9255       APInt StoreInt(StoreBW, 0);
   9256 
   9257       // Construct a single integer constant which is made of the smaller
   9258       // constant inputs.
   9259       bool IsLE = TLI.isLittleEndian();
   9260       for (unsigned i = 0; i < NumElem ; ++i) {
   9261         unsigned Idx = IsLE ?(NumElem - 1 - i) : i;
   9262         StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
   9263         SDValue Val = St->getValue();
   9264         StoreInt<<=ElementSizeBytes*8;
   9265         if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
   9266           StoreInt|=C->getAPIntValue().zext(StoreBW);
   9267         } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
   9268           StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
   9269         } else {
   9270           assert(false && "Invalid constant element type");
   9271         }
   9272       }
   9273 
   9274       // Create the new Load and Store operations.
   9275       EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
   9276       StoredVal = DAG.getConstant(StoreInt, StoreTy);
   9277     }
   9278 
   9279     SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
   9280                                     FirstInChain->getBasePtr(),
   9281                                     FirstInChain->getPointerInfo(),
   9282                                     false, false,
   9283                                     FirstInChain->getAlignment());
   9284 
   9285     // Replace the first store with the new store
   9286     CombineTo(EarliestOp, NewStore);
   9287     // Erase all other stores.
   9288     for (unsigned i = 0; i < NumElem ; ++i) {
   9289       if (StoreNodes[i].MemNode == EarliestOp)
   9290         continue;
   9291       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
   9292       // ReplaceAllUsesWith will replace all uses that existed when it was
   9293       // called, but graph optimizations may cause new ones to appear. For
   9294       // example, the case in pr14333 looks like
   9295       //
   9296       //  St's chain -> St -> another store -> X
   9297       //
   9298       // And the only difference from St to the other store is the chain.
   9299       // When we change it's chain to be St's chain they become identical,
   9300       // get CSEed and the net result is that X is now a use of St.
   9301       // Since we know that St is redundant, just iterate.
   9302       while (!St->use_empty())
   9303         DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
   9304       removeFromWorkList(St);
   9305       DAG.DeleteNode(St);
   9306     }
   9307 
   9308     return true;
   9309   }
   9310 
   9311   // Below we handle the case of multiple consecutive stores that
   9312   // come from multiple consecutive loads. We merge them into a single
   9313   // wide load and a single wide store.
   9314 
   9315   // Look for load nodes which are used by the stored values.
   9316   SmallVector<MemOpLink, 8> LoadNodes;
   9317 
   9318   // Find acceptable loads. Loads need to have the same chain (token factor),
   9319   // must not be zext, volatile, indexed, and they must be consecutive.
   9320   BaseIndexOffset LdBasePtr;
   9321   for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
   9322     StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
   9323     LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
   9324     if (!Ld) break;
   9325 
   9326     // Loads must only have one use.
   9327     if (!Ld->hasNUsesOfValue(1, 0))
   9328       break;
   9329 
   9330     // Check that the alignment is the same as the stores.
   9331     if (Ld->getAlignment() != St->getAlignment())
   9332       break;
   9333 
   9334     // The memory operands must not be volatile.
   9335     if (Ld->isVolatile() || Ld->isIndexed())
   9336       break;
   9337 
   9338     // We do not accept ext loads.
   9339     if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
   9340       break;
   9341 
   9342     // The stored memory type must be the same.
   9343     if (Ld->getMemoryVT() != MemVT)
   9344       break;
   9345 
   9346     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr());
   9347     // If this is not the first ptr that we check.
   9348     if (LdBasePtr.Base.getNode()) {
   9349       // The base ptr must be the same.
   9350       if (!LdPtr.equalBaseIndex(LdBasePtr))
   9351         break;
   9352     } else {
   9353       // Check that all other base pointers are the same as this one.
   9354       LdBasePtr = LdPtr;
   9355     }
   9356 
   9357     // We found a potential memory operand to merge.
   9358     LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
   9359   }
   9360 
   9361   if (LoadNodes.size() < 2)
   9362     return false;
   9363 
   9364   // Scan the memory operations on the chain and find the first non-consecutive
   9365   // load memory address. These variables hold the index in the store node
   9366   // array.
   9367   unsigned LastConsecutiveLoad = 0;
   9368   // This variable refers to the size and not index in the array.
   9369   unsigned LastLegalVectorType = 0;
   9370   unsigned LastLegalIntegerType = 0;
   9371   StartAddress = LoadNodes[0].OffsetFromBase;
   9372   SDValue FirstChain = LoadNodes[0].MemNode->getChain();
   9373   for (unsigned i = 1; i < LoadNodes.size(); ++i) {
   9374     // All loads much share the same chain.
   9375     if (LoadNodes[i].MemNode->getChain() != FirstChain)
   9376       break;
   9377 
   9378     int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
   9379     if (CurrAddress - StartAddress != (ElementSizeBytes * i))
   9380       break;
   9381     LastConsecutiveLoad = i;
   9382 
   9383     // Find a legal type for the vector store.
   9384     EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
   9385     if (TLI.isTypeLegal(StoreTy))
   9386       LastLegalVectorType = i + 1;
   9387 
   9388     // Find a legal type for the integer store.
   9389     unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
   9390     StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
   9391     if (TLI.isTypeLegal(StoreTy))
   9392       LastLegalIntegerType = i + 1;
   9393     // Or check whether a truncstore and extload is legal.
   9394     else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
   9395              TargetLowering::TypePromoteInteger) {
   9396       EVT LegalizedStoredValueTy =
   9397         TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy);
   9398       if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
   9399           TLI.isLoadExtLegal(ISD::ZEXTLOAD, StoreTy) &&
   9400           TLI.isLoadExtLegal(ISD::SEXTLOAD, StoreTy) &&
   9401           TLI.isLoadExtLegal(ISD::EXTLOAD, StoreTy))
   9402         LastLegalIntegerType = i+1;
   9403     }
   9404   }
   9405 
   9406   // Only use vector types if the vector type is larger than the integer type.
   9407   // If they are the same, use integers.
   9408   bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
   9409   unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
   9410 
   9411   // We add +1 here because the LastXXX variables refer to location while
   9412   // the NumElem refers to array/index size.
   9413   unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
   9414   NumElem = std::min(LastLegalType, NumElem);
   9415 
   9416   if (NumElem < 2)
   9417     return false;
   9418 
   9419   // The earliest Node in the DAG.
   9420   unsigned EarliestNodeUsed = 0;
   9421   LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
   9422   for (unsigned i=1; i<NumElem; ++i) {
   9423     // Find a chain for the new wide-store operand. Notice that some
   9424     // of the store nodes that we found may not be selected for inclusion
   9425     // in the wide store. The chain we use needs to be the chain of the
   9426     // earliest store node which is *used* and replaced by the wide store.
   9427     if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
   9428       EarliestNodeUsed = i;
   9429   }
   9430 
   9431   // Find if it is better to use vectors or integers to load and store
   9432   // to memory.
   9433   EVT JointMemOpVT;
   9434   if (UseVectorTy) {
   9435     JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
   9436   } else {
   9437     unsigned StoreBW = NumElem * ElementSizeBytes * 8;
   9438     JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
   9439   }
   9440 
   9441   SDLoc LoadDL(LoadNodes[0].MemNode);
   9442   SDLoc StoreDL(StoreNodes[0].MemNode);
   9443 
   9444   LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
   9445   SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL,
   9446                                 FirstLoad->getChain(),
   9447                                 FirstLoad->getBasePtr(),
   9448                                 FirstLoad->getPointerInfo(),
   9449                                 false, false, false,
   9450                                 FirstLoad->getAlignment());
   9451 
   9452   SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad,
   9453                                   FirstInChain->getBasePtr(),
   9454                                   FirstInChain->getPointerInfo(), false, false,
   9455                                   FirstInChain->getAlignment());
   9456 
   9457   // Replace one of the loads with the new load.
   9458   LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
   9459   DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
   9460                                 SDValue(NewLoad.getNode(), 1));
   9461 
   9462   // Remove the rest of the load chains.
   9463   for (unsigned i = 1; i < NumElem ; ++i) {
   9464     // Replace all chain users of the old load nodes with the chain of the new
   9465     // load node.
   9466     LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
   9467     DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
   9468   }
   9469 
   9470   // Replace the first store with the new store.
   9471   CombineTo(EarliestOp, NewStore);
   9472   // Erase all other stores.
   9473   for (unsigned i = 0; i < NumElem ; ++i) {
   9474     // Remove all Store nodes.
   9475     if (StoreNodes[i].MemNode == EarliestOp)
   9476       continue;
   9477     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
   9478     DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
   9479     removeFromWorkList(St);
   9480     DAG.DeleteNode(St);
   9481   }
   9482 
   9483   return true;
   9484 }
   9485 
   9486 SDValue DAGCombiner::visitSTORE(SDNode *N) {
   9487   StoreSDNode *ST  = cast<StoreSDNode>(N);
   9488   SDValue Chain = ST->getChain();
   9489   SDValue Value = ST->getValue();
   9490   SDValue Ptr   = ST->getBasePtr();
   9491 
   9492   // If this is a store of a bit convert, store the input value if the
   9493   // resultant store does not need a higher alignment than the original.
   9494   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
   9495       ST->isUnindexed()) {
   9496     unsigned OrigAlign = ST->getAlignment();
   9497     EVT SVT = Value.getOperand(0).getValueType();
   9498     unsigned Align = TLI.getDataLayout()->
   9499       getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
   9500     if (Align <= OrigAlign &&
   9501         ((!LegalOperations && !ST->isVolatile()) ||
   9502          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
   9503       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0),
   9504                           Ptr, ST->getPointerInfo(), ST->isVolatile(),
   9505                           ST->isNonTemporal(), OrigAlign,
   9506                           ST->getTBAAInfo());
   9507   }
   9508 
   9509   // Turn 'store undef, Ptr' -> nothing.
   9510   if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
   9511     return Chain;
   9512 
   9513   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
   9514   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
   9515     // NOTE: If the original store is volatile, this transform must not increase
   9516     // the number of stores.  For example, on x86-32 an f64 can be stored in one
   9517     // processor operation but an i64 (which is not legal) requires two.  So the
   9518     // transform should not be done in this case.
   9519     if (Value.getOpcode() != ISD::TargetConstantFP) {
   9520       SDValue Tmp;
   9521       switch (CFP->getSimpleValueType(0).SimpleTy) {
   9522       default: llvm_unreachable("Unknown FP type");
   9523       case MVT::f16:    // We don't do this for these yet.
   9524       case MVT::f80:
   9525       case MVT::f128:
   9526       case MVT::ppcf128:
   9527         break;
   9528       case MVT::f32:
   9529         if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
   9530             TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
   9531           Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
   9532                               bitcastToAPInt().getZExtValue(), MVT::i32);
   9533           return DAG.getStore(Chain, SDLoc(N), Tmp,
   9534                               Ptr, ST->getMemOperand());
   9535         }
   9536         break;
   9537       case MVT::f64:
   9538         if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
   9539              !ST->isVolatile()) ||
   9540             TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
   9541           Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
   9542                                 getZExtValue(), MVT::i64);
   9543           return DAG.getStore(Chain, SDLoc(N), Tmp,
   9544                               Ptr, ST->getMemOperand());
   9545         }
   9546 
   9547         if (!ST->isVolatile() &&
   9548             TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
   9549           // Many FP stores are not made apparent until after legalize, e.g. for
   9550           // argument passing.  Since this is so common, custom legalize the
   9551           // 64-bit integer store into two 32-bit stores.
   9552           uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
   9553           SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
   9554           SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
   9555           if (TLI.isBigEndian()) std::swap(Lo, Hi);
   9556 
   9557           unsigned Alignment = ST->getAlignment();
   9558           bool isVolatile = ST->isVolatile();
   9559           bool isNonTemporal = ST->isNonTemporal();
   9560           const MDNode *TBAAInfo = ST->getTBAAInfo();
   9561 
   9562           SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo,
   9563                                      Ptr, ST->getPointerInfo(),
   9564                                      isVolatile, isNonTemporal,
   9565                                      ST->getAlignment(), TBAAInfo);
   9566           Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr,
   9567                             DAG.getConstant(4, Ptr.getValueType()));
   9568           Alignment = MinAlign(Alignment, 4U);
   9569           SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi,
   9570                                      Ptr, ST->getPointerInfo().getWithOffset(4),
   9571                                      isVolatile, isNonTemporal,
   9572                                      Alignment, TBAAInfo);
   9573           return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
   9574                              St0, St1);
   9575         }
   9576 
   9577         break;
   9578       }
   9579     }
   9580   }
   9581 
   9582   // Try to infer better alignment information than the store already has.
   9583   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
   9584     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
   9585       if (Align > ST->getAlignment())
   9586         return DAG.getTruncStore(Chain, SDLoc(N), Value,
   9587                                  Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
   9588                                  ST->isVolatile(), ST->isNonTemporal(), Align,
   9589                                  ST->getTBAAInfo());
   9590     }
   9591   }
   9592 
   9593   // Try transforming a pair floating point load / store ops to integer
   9594   // load / store ops.
   9595   SDValue NewST = TransformFPLoadStorePair(N);
   9596   if (NewST.getNode())
   9597     return NewST;
   9598 
   9599   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
   9600     TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
   9601 #ifndef NDEBUG
   9602   if (CombinerAAOnlyFunc.getNumOccurrences() &&
   9603       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
   9604     UseAA = false;
   9605 #endif
   9606   if (UseAA && ST->isUnindexed()) {
   9607     // Walk up chain skipping non-aliasing memory nodes.
   9608     SDValue BetterChain = FindBetterChain(N, Chain);
   9609 
   9610     // If there is a better chain.
   9611     if (Chain != BetterChain) {
   9612       SDValue ReplStore;
   9613 
   9614       // Replace the chain to avoid dependency.
   9615       if (ST->isTruncatingStore()) {
   9616         ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr,
   9617                                       ST->getMemoryVT(), ST->getMemOperand());
   9618       } else {
   9619         ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr,
   9620                                  ST->getMemOperand());
   9621       }
   9622 
   9623       // Create token to keep both nodes around.
   9624       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
   9625                                   MVT::Other, Chain, ReplStore);
   9626 
   9627       // Make sure the new and old chains are cleaned up.
   9628       AddToWorkList(Token.getNode());
   9629 
   9630       // Don't add users to work list.
   9631       return CombineTo(N, Token, false);
   9632     }
   9633   }
   9634 
   9635   // Try transforming N to an indexed store.
   9636   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
   9637     return SDValue(N, 0);
   9638 
   9639   // FIXME: is there such a thing as a truncating indexed store?
   9640   if (ST->isTruncatingStore() && ST->isUnindexed() &&
   9641       Value.getValueType().isInteger()) {
   9642     // See if we can simplify the input to this truncstore with knowledge that
   9643     // only the low bits are being used.  For example:
   9644     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
   9645     SDValue Shorter =
   9646       GetDemandedBits(Value,
   9647                       APInt::getLowBitsSet(
   9648                         Value.getValueType().getScalarType().getSizeInBits(),
   9649                         ST->getMemoryVT().getScalarType().getSizeInBits()));
   9650     AddToWorkList(Value.getNode());
   9651     if (Shorter.getNode())
   9652       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
   9653                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
   9654 
   9655     // Otherwise, see if we can simplify the operation with
   9656     // SimplifyDemandedBits, which only works if the value has a single use.
   9657     if (SimplifyDemandedBits(Value,
   9658                         APInt::getLowBitsSet(
   9659                           Value.getValueType().getScalarType().getSizeInBits(),
   9660                           ST->getMemoryVT().getScalarType().getSizeInBits())))
   9661       return SDValue(N, 0);
   9662   }
   9663 
   9664   // If this is a load followed by a store to the same location, then the store
   9665   // is dead/noop.
   9666   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
   9667     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
   9668         ST->isUnindexed() && !ST->isVolatile() &&
   9669         // There can't be any side effects between the load and store, such as
   9670         // a call or store.
   9671         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
   9672       // The store is dead, remove it.
   9673       return Chain;
   9674     }
   9675   }
   9676 
   9677   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
   9678   // truncating store.  We can do this even if this is already a truncstore.
   9679   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
   9680       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
   9681       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
   9682                             ST->getMemoryVT())) {
   9683     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
   9684                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
   9685   }
   9686 
   9687   // Only perform this optimization before the types are legal, because we
   9688   // don't want to perform this optimization on every DAGCombine invocation.
   9689   if (!LegalTypes) {
   9690     bool EverChanged = false;
   9691 
   9692     do {
   9693       // There can be multiple store sequences on the same chain.
   9694       // Keep trying to merge store sequences until we are unable to do so
   9695       // or until we merge the last store on the chain.
   9696       bool Changed = MergeConsecutiveStores(ST);
   9697       EverChanged |= Changed;
   9698       if (!Changed) break;
   9699     } while (ST->getOpcode() != ISD::DELETED_NODE);
   9700 
   9701     if (EverChanged)
   9702       return SDValue(N, 0);
   9703   }
   9704 
   9705   return ReduceLoadOpStoreWidth(N);
   9706 }
   9707 
   9708 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
   9709   SDValue InVec = N->getOperand(0);
   9710   SDValue InVal = N->getOperand(1);
   9711   SDValue EltNo = N->getOperand(2);
   9712   SDLoc dl(N);
   9713 
   9714   // If the inserted element is an UNDEF, just use the input vector.
   9715   if (InVal.getOpcode() == ISD::UNDEF)
   9716     return InVec;
   9717 
   9718   EVT VT = InVec.getValueType();
   9719 
   9720   // If we can't generate a legal BUILD_VECTOR, exit
   9721   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
   9722     return SDValue();
   9723 
   9724   // Check that we know which element is being inserted
   9725   if (!isa<ConstantSDNode>(EltNo))
   9726     return SDValue();
   9727   unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
   9728 
   9729   // Canonicalize insert_vector_elt dag nodes.
   9730   // Example:
   9731   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
   9732   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
   9733   //
   9734   // Do this only if the child insert_vector node has one use; also
   9735   // do this only if indices are both constants and Idx1 < Idx0.
   9736   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
   9737       && isa<ConstantSDNode>(InVec.getOperand(2))) {
   9738     unsigned OtherElt =
   9739       cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
   9740     if (Elt < OtherElt) {
   9741       // Swap nodes.
   9742       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT,
   9743                                   InVec.getOperand(0), InVal, EltNo);
   9744       AddToWorkList(NewOp.getNode());
   9745       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
   9746                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
   9747     }
   9748   }
   9749 
   9750   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
   9751   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
   9752   // vector elements.
   9753   SmallVector<SDValue, 8> Ops;
   9754   // Do not combine these two vectors if the output vector will not replace
   9755   // the input vector.
   9756   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
   9757     Ops.append(InVec.getNode()->op_begin(),
   9758                InVec.getNode()->op_end());
   9759   } else if (InVec.getOpcode() == ISD::UNDEF) {
   9760     unsigned NElts = VT.getVectorNumElements();
   9761     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
   9762   } else {
   9763     return SDValue();
   9764   }
   9765 
   9766   // Insert the element
   9767   if (Elt < Ops.size()) {
   9768     // All the operands of BUILD_VECTOR must have the same type;
   9769     // we enforce that here.
   9770     EVT OpVT = Ops[0].getValueType();
   9771     if (InVal.getValueType() != OpVT)
   9772       InVal = OpVT.bitsGT(InVal.getValueType()) ?
   9773                 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
   9774                 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
   9775     Ops[Elt] = InVal;
   9776   }
   9777 
   9778   // Return the new vector
   9779   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
   9780 }
   9781 
   9782 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
   9783     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
   9784   EVT ResultVT = EVE->getValueType(0);
   9785   EVT VecEltVT = InVecVT.getVectorElementType();
   9786   unsigned Align = OriginalLoad->getAlignment();
   9787   unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
   9788       VecEltVT.getTypeForEVT(*DAG.getContext()));
   9789 
   9790   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
   9791     return SDValue();
   9792 
   9793   Align = NewAlign;
   9794 
   9795   SDValue NewPtr = OriginalLoad->getBasePtr();
   9796   SDValue Offset;
   9797   EVT PtrType = NewPtr.getValueType();
   9798   MachinePointerInfo MPI;
   9799   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
   9800     int Elt = ConstEltNo->getZExtValue();
   9801     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
   9802     if (TLI.isBigEndian())
   9803       PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff;
   9804     Offset = DAG.getConstant(PtrOff, PtrType);
   9805     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
   9806   } else {
   9807     Offset = DAG.getNode(
   9808         ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo,
   9809         DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType()));
   9810     if (TLI.isBigEndian())
   9811       Offset = DAG.getNode(
   9812           ISD::SUB, SDLoc(EVE), EltNo.getValueType(),
   9813           DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset);
   9814     MPI = OriginalLoad->getPointerInfo();
   9815   }
   9816   NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset);
   9817 
   9818   // The replacement we need to do here is a little tricky: we need to
   9819   // replace an extractelement of a load with a load.
   9820   // Use ReplaceAllUsesOfValuesWith to do the replacement.
   9821   // Note that this replacement assumes that the extractvalue is the only
   9822   // use of the load; that's okay because we don't want to perform this
   9823   // transformation in other cases anyway.
   9824   SDValue Load;
   9825   SDValue Chain;
   9826   if (ResultVT.bitsGT(VecEltVT)) {
   9827     // If the result type of vextract is wider than the load, then issue an
   9828     // extending load instead.
   9829     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT)
   9830                                    ? ISD::ZEXTLOAD
   9831                                    : ISD::EXTLOAD;
   9832     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(),
   9833                           NewPtr, MPI, VecEltVT, OriginalLoad->isVolatile(),
   9834                           OriginalLoad->isNonTemporal(), Align,
   9835                           OriginalLoad->getTBAAInfo());
   9836     Chain = Load.getValue(1);
   9837   } else {
   9838     Load = DAG.getLoad(
   9839         VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,
   9840         OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
   9841         OriginalLoad->isInvariant(), Align, OriginalLoad->getTBAAInfo());
   9842     Chain = Load.getValue(1);
   9843     if (ResultVT.bitsLT(VecEltVT))
   9844       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
   9845     else
   9846       Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load);
   9847   }
   9848   WorkListRemover DeadNodes(*this);
   9849   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
   9850   SDValue To[] = { Load, Chain };
   9851   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
   9852   // Since we're explicitly calling ReplaceAllUses, add the new node to the
   9853   // worklist explicitly as well.
   9854   AddToWorkList(Load.getNode());
   9855   AddUsersToWorkList(Load.getNode()); // Add users too
   9856   // Make sure to revisit this node to clean it up; it will usually be dead.
   9857   AddToWorkList(EVE);
   9858   ++OpsNarrowed;
   9859   return SDValue(EVE, 0);
   9860 }
   9861 
   9862 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   9863   // (vextract (scalar_to_vector val, 0) -> val
   9864   SDValue InVec = N->getOperand(0);
   9865   EVT VT = InVec.getValueType();
   9866   EVT NVT = N->getValueType(0);
   9867 
   9868   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
   9869     // Check if the result type doesn't match the inserted element type. A
   9870     // SCALAR_TO_VECTOR may truncate the inserted element and the
   9871     // EXTRACT_VECTOR_ELT may widen the extracted vector.
   9872     SDValue InOp = InVec.getOperand(0);
   9873     if (InOp.getValueType() != NVT) {
   9874       assert(InOp.getValueType().isInteger() && NVT.isInteger());
   9875       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
   9876     }
   9877     return InOp;
   9878   }
   9879 
   9880   SDValue EltNo = N->getOperand(1);
   9881   bool ConstEltNo = isa<ConstantSDNode>(EltNo);
   9882 
   9883   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
   9884   // We only perform this optimization before the op legalization phase because
   9885   // we may introduce new vector instructions which are not backed by TD
   9886   // patterns. For example on AVX, extracting elements from a wide vector
   9887   // without using extract_subvector. However, if we can find an underlying
   9888   // scalar value, then we can always use that.
   9889   if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
   9890       && ConstEltNo) {
   9891     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
   9892     int NumElem = VT.getVectorNumElements();
   9893     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
   9894     // Find the new index to extract from.
   9895     int OrigElt = SVOp->getMaskElt(Elt);
   9896 
   9897     // Extracting an undef index is undef.
   9898     if (OrigElt == -1)
   9899       return DAG.getUNDEF(NVT);
   9900 
   9901     // Select the right vector half to extract from.
   9902     SDValue SVInVec;
   9903     if (OrigElt < NumElem) {
   9904       SVInVec = InVec->getOperand(0);
   9905     } else {
   9906       SVInVec = InVec->getOperand(1);
   9907       OrigElt -= NumElem;
   9908     }
   9909 
   9910     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
   9911       SDValue InOp = SVInVec.getOperand(OrigElt);
   9912       if (InOp.getValueType() != NVT) {
   9913         assert(InOp.getValueType().isInteger() && NVT.isInteger());
   9914         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
   9915       }
   9916 
   9917       return InOp;
   9918     }
   9919 
   9920     // FIXME: We should handle recursing on other vector shuffles and
   9921     // scalar_to_vector here as well.
   9922 
   9923     if (!LegalOperations) {
   9924       EVT IndexTy = TLI.getVectorIdxTy();
   9925       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT,
   9926                          SVInVec, DAG.getConstant(OrigElt, IndexTy));
   9927     }
   9928   }
   9929 
   9930   bool BCNumEltsChanged = false;
   9931   EVT ExtVT = VT.getVectorElementType();
   9932   EVT LVT = ExtVT;
   9933 
   9934   // If the result of load has to be truncated, then it's not necessarily
   9935   // profitable.
   9936   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
   9937     return SDValue();
   9938 
   9939   if (InVec.getOpcode() == ISD::BITCAST) {
   9940     // Don't duplicate a load with other uses.
   9941     if (!InVec.hasOneUse())
   9942       return SDValue();
   9943 
   9944     EVT BCVT = InVec.getOperand(0).getValueType();
   9945     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
   9946       return SDValue();
   9947     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
   9948       BCNumEltsChanged = true;
   9949     InVec = InVec.getOperand(0);
   9950     ExtVT = BCVT.getVectorElementType();
   9951   }
   9952 
   9953   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
   9954   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
   9955       ISD::isNormalLoad(InVec.getNode())) {
   9956     SDValue Index = N->getOperand(1);
   9957     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec))
   9958       return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
   9959                                                            OrigLoad);
   9960   }
   9961 
   9962   // Perform only after legalization to ensure build_vector / vector_shuffle
   9963   // optimizations have already been done.
   9964   if (!LegalOperations) return SDValue();
   9965 
   9966   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
   9967   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
   9968   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
   9969 
   9970   if (ConstEltNo) {
   9971     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
   9972 
   9973     LoadSDNode *LN0 = nullptr;
   9974     const ShuffleVectorSDNode *SVN = nullptr;
   9975     if (ISD::isNormalLoad(InVec.getNode())) {
   9976       LN0 = cast<LoadSDNode>(InVec);
   9977     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
   9978                InVec.getOperand(0).getValueType() == ExtVT &&
   9979                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
   9980       // Don't duplicate a load with other uses.
   9981       if (!InVec.hasOneUse())
   9982         return SDValue();
   9983 
   9984       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
   9985     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
   9986       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
   9987       // =>
   9988       // (load $addr+1*size)
   9989 
   9990       // Don't duplicate a load with other uses.
   9991       if (!InVec.hasOneUse())
   9992         return SDValue();
   9993 
   9994       // If the bit convert changed the number of elements, it is unsafe
   9995       // to examine the mask.
   9996       if (BCNumEltsChanged)
   9997         return SDValue();
   9998 
   9999       // Select the input vector, guarding against out of range extract vector.
   10000       unsigned NumElems = VT.getVectorNumElements();
   10001       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
   10002       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
   10003 
   10004       if (InVec.getOpcode() == ISD::BITCAST) {
   10005         // Don't duplicate a load with other uses.
   10006         if (!InVec.hasOneUse())
   10007           return SDValue();
   10008 
   10009         InVec = InVec.getOperand(0);
   10010       }
   10011       if (ISD::isNormalLoad(InVec.getNode())) {
   10012         LN0 = cast<LoadSDNode>(InVec);
   10013         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
   10014         EltNo = DAG.getConstant(Elt, EltNo.getValueType());
   10015       }
   10016     }
   10017 
   10018     // Make sure we found a non-volatile load and the extractelement is
   10019     // the only use.
   10020     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
   10021       return SDValue();
   10022 
   10023     // If Idx was -1 above, Elt is going to be -1, so just return undef.
   10024     if (Elt == -1)
   10025       return DAG.getUNDEF(LVT);
   10026 
   10027     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
   10028   }
   10029 
   10030   return SDValue();
   10031 }
   10032 
   10033 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
   10034 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
   10035   // We perform this optimization post type-legalization because
   10036   // the type-legalizer often scalarizes integer-promoted vectors.
   10037   // Performing this optimization before may create bit-casts which
   10038   // will be type-legalized to complex code sequences.
   10039   // We perform this optimization only before the operation legalizer because we
   10040   // may introduce illegal operations.
   10041   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
   10042     return SDValue();
   10043 
   10044   unsigned NumInScalars = N->getNumOperands();
   10045   SDLoc dl(N);
   10046   EVT VT = N->getValueType(0);
   10047 
   10048   // Check to see if this is a BUILD_VECTOR of a bunch of values
   10049   // which come from any_extend or zero_extend nodes. If so, we can create
   10050   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
   10051   // optimizations. We do not handle sign-extend because we can't fill the sign
   10052   // using shuffles.
   10053   EVT SourceType = MVT::Other;
   10054   bool AllAnyExt = true;
   10055 
   10056   for (unsigned i = 0; i != NumInScalars; ++i) {
   10057     SDValue In = N->getOperand(i);
   10058     // Ignore undef inputs.
   10059     if (In.getOpcode() == ISD::UNDEF) continue;
   10060 
   10061     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
   10062     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
   10063 
   10064     // Abort if the element is not an extension.
   10065     if (!ZeroExt && !AnyExt) {
   10066       SourceType = MVT::Other;
   10067       break;
   10068     }
   10069 
   10070     // The input is a ZeroExt or AnyExt. Check the original type.
   10071     EVT InTy = In.getOperand(0).getValueType();
   10072 
   10073     // Check that all of the widened source types are the same.
   10074     if (SourceType == MVT::Other)
   10075       // First time.
   10076       SourceType = InTy;
   10077     else if (InTy != SourceType) {
   10078       // Multiple income types. Abort.
   10079       SourceType = MVT::Other;
   10080       break;
   10081     }
   10082 
   10083     // Check if all of the extends are ANY_EXTENDs.
   10084     AllAnyExt &= AnyExt;
   10085   }
   10086 
   10087   // In order to have valid types, all of the inputs must be extended from the
   10088   // same source type and all of the inputs must be any or zero extend.
   10089   // Scalar sizes must be a power of two.
   10090   EVT OutScalarTy = VT.getScalarType();
   10091   bool ValidTypes = SourceType != MVT::Other &&
   10092                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
   10093                  isPowerOf2_32(SourceType.getSizeInBits());
   10094 
   10095   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
   10096   // turn into a single shuffle instruction.
   10097   if (!ValidTypes)
   10098     return SDValue();
   10099 
   10100   bool isLE = TLI.isLittleEndian();
   10101   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
   10102   assert(ElemRatio > 1 && "Invalid element size ratio");
   10103   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
   10104                                DAG.getConstant(0, SourceType);
   10105 
   10106   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
   10107   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
   10108 
   10109   // Populate the new build_vector
   10110   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
   10111     SDValue Cast = N->getOperand(i);
   10112     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
   10113             Cast.getOpcode() == ISD::ZERO_EXTEND ||
   10114             Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
   10115     SDValue In;
   10116     if (Cast.getOpcode() == ISD::UNDEF)
   10117       In = DAG.getUNDEF(SourceType);
   10118     else
   10119       In = Cast->getOperand(0);
   10120     unsigned Index = isLE ? (i * ElemRatio) :
   10121                             (i * ElemRatio + (ElemRatio - 1));
   10122 
   10123     assert(Index < Ops.size() && "Invalid index");
   10124     Ops[Index] = In;
   10125   }
   10126 
   10127   // The type of the new BUILD_VECTOR node.
   10128   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
   10129   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
   10130          "Invalid vector size");
   10131   // Check if the new vector type is legal.
   10132   if (!isTypeLegal(VecVT)) return SDValue();
   10133 
   10134   // Make the new BUILD_VECTOR.
   10135   SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
   10136 
   10137   // The new BUILD_VECTOR node has the potential to be further optimized.
   10138   AddToWorkList(BV.getNode());
   10139   // Bitcast to the desired type.
   10140   return DAG.getNode(ISD::BITCAST, dl, VT, BV);
   10141 }
   10142 
   10143 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
   10144   EVT VT = N->getValueType(0);
   10145 
   10146   unsigned NumInScalars = N->getNumOperands();
   10147   SDLoc dl(N);
   10148 
   10149   EVT SrcVT = MVT::Other;
   10150   unsigned Opcode = ISD::DELETED_NODE;
   10151   unsigned NumDefs = 0;
   10152 
   10153   for (unsigned i = 0; i != NumInScalars; ++i) {
   10154     SDValue In = N->getOperand(i);
   10155     unsigned Opc = In.getOpcode();
   10156 
   10157     if (Opc == ISD::UNDEF)
   10158       continue;
   10159 
   10160     // If all scalar values are floats and converted from integers.
   10161     if (Opcode == ISD::DELETED_NODE &&
   10162         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
   10163       Opcode = Opc;
   10164     }
   10165 
   10166     if (Opc != Opcode)
   10167       return SDValue();
   10168 
   10169     EVT InVT = In.getOperand(0).getValueType();
   10170 
   10171     // If all scalar values are typed differently, bail out. It's chosen to
   10172     // simplify BUILD_VECTOR of integer types.
   10173     if (SrcVT == MVT::Other)
   10174       SrcVT = InVT;
   10175     if (SrcVT != InVT)
   10176       return SDValue();
   10177     NumDefs++;
   10178   }
   10179 
   10180   // If the vector has just one element defined, it's not worth to fold it into
   10181   // a vectorized one.
   10182   if (NumDefs < 2)
   10183     return SDValue();
   10184 
   10185   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
   10186          && "Should only handle conversion from integer to float.");
   10187   assert(SrcVT != MVT::Other && "Cannot determine source type!");
   10188 
   10189   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
   10190 
   10191   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
   10192     return SDValue();
   10193 
   10194   SmallVector<SDValue, 8> Opnds;
   10195   for (unsigned i = 0; i != NumInScalars; ++i) {
   10196     SDValue In = N->getOperand(i);
   10197 
   10198     if (In.getOpcode() == ISD::UNDEF)
   10199       Opnds.push_back(DAG.getUNDEF(SrcVT));
   10200     else
   10201       Opnds.push_back(In.getOperand(0));
   10202   }
   10203   SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds);
   10204   AddToWorkList(BV.getNode());
   10205 
   10206   return DAG.getNode(Opcode, dl, VT, BV);
   10207 }
   10208 
   10209 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   10210   unsigned NumInScalars = N->getNumOperands();
   10211   SDLoc dl(N);
   10212   EVT VT = N->getValueType(0);
   10213 
   10214   // A vector built entirely of undefs is undef.
   10215   if (ISD::allOperandsUndef(N))
   10216     return DAG.getUNDEF(VT);
   10217 
   10218   SDValue V = reduceBuildVecExtToExtBuildVec(N);
   10219   if (V.getNode())
   10220     return V;
   10221 
   10222   V = reduceBuildVecConvertToConvertBuildVec(N);
   10223   if (V.getNode())
   10224     return V;
   10225 
   10226   // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
   10227   // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
   10228   // at most two distinct vectors, turn this into a shuffle node.
   10229 
   10230   // May only combine to shuffle after legalize if shuffle is legal.
   10231   if (LegalOperations &&
   10232       !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))
   10233     return SDValue();
   10234 
   10235   SDValue VecIn1, VecIn2;
   10236   for (unsigned i = 0; i != NumInScalars; ++i) {
   10237     // Ignore undef inputs.
   10238     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
   10239 
   10240     // If this input is something other than a EXTRACT_VECTOR_ELT with a
   10241     // constant index, bail out.
   10242     if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
   10243         !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
   10244       VecIn1 = VecIn2 = SDValue(nullptr, 0);
   10245       break;
   10246     }
   10247 
   10248     // We allow up to two distinct input vectors.
   10249     SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
   10250     if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
   10251       continue;
   10252 
   10253     if (!VecIn1.getNode()) {
   10254       VecIn1 = ExtractedFromVec;
   10255     } else if (!VecIn2.getNode()) {
   10256       VecIn2 = ExtractedFromVec;
   10257     } else {
   10258       // Too many inputs.
   10259       VecIn1 = VecIn2 = SDValue(nullptr, 0);
   10260       break;
   10261     }
   10262   }
   10263 
   10264   // If everything is good, we can make a shuffle operation.
   10265   if (VecIn1.getNode()) {
   10266     SmallVector<int, 8> Mask;
   10267     for (unsigned i = 0; i != NumInScalars; ++i) {
   10268       if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
   10269         Mask.push_back(-1);
   10270         continue;
   10271       }
   10272 
   10273       // If extracting from the first vector, just use the index directly.
   10274       SDValue Extract = N->getOperand(i);
   10275       SDValue ExtVal = Extract.getOperand(1);
   10276       if (Extract.getOperand(0) == VecIn1) {
   10277         unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
   10278         if (ExtIndex > VT.getVectorNumElements())
   10279           return SDValue();
   10280 
   10281         Mask.push_back(ExtIndex);
   10282         continue;
   10283       }
   10284 
   10285       // Otherwise, use InIdx + VecSize
   10286       unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
   10287       Mask.push_back(Idx+NumInScalars);
   10288     }
   10289 
   10290     // We can't generate a shuffle node with mismatched input and output types.
   10291     // Attempt to transform a single input vector to the correct type.
   10292     if ((VT != VecIn1.getValueType())) {
   10293       // We don't support shuffeling between TWO values of different types.
   10294       if (VecIn2.getNode())
   10295         return SDValue();
   10296 
   10297       // We only support widening of vectors which are half the size of the
   10298       // output registers. For example XMM->YMM widening on X86 with AVX.
   10299       if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
   10300         return SDValue();
   10301 
   10302       // If the input vector type has a different base type to the output
   10303       // vector type, bail out.
   10304       if (VecIn1.getValueType().getVectorElementType() !=
   10305           VT.getVectorElementType())
   10306         return SDValue();
   10307 
   10308       // Widen the input vector by adding undef values.
   10309       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
   10310                            VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
   10311     }
   10312 
   10313     // If VecIn2 is unused then change it to undef.
   10314     VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
   10315 
   10316     // Check that we were able to transform all incoming values to the same
   10317     // type.
   10318     if (VecIn2.getValueType() != VecIn1.getValueType() ||
   10319         VecIn1.getValueType() != VT)
   10320           return SDValue();
   10321 
   10322     // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
   10323     if (!isTypeLegal(VT))
   10324       return SDValue();
   10325 
   10326     // Return the new VECTOR_SHUFFLE node.
   10327     SDValue Ops[2];
   10328     Ops[0] = VecIn1;
   10329     Ops[1] = VecIn2;
   10330     return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]);
   10331   }
   10332 
   10333   return SDValue();
   10334 }
   10335 
   10336 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
   10337   // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
   10338   // EXTRACT_SUBVECTOR operations.  If so, and if the EXTRACT_SUBVECTOR vector
   10339   // inputs come from at most two distinct vectors, turn this into a shuffle
   10340   // node.
   10341 
   10342   // If we only have one input vector, we don't need to do any concatenation.
   10343   if (N->getNumOperands() == 1)
   10344     return N->getOperand(0);
   10345 
   10346   // Check if all of the operands are undefs.
   10347   EVT VT = N->getValueType(0);
   10348   if (ISD::allOperandsUndef(N))
   10349     return DAG.getUNDEF(VT);
   10350 
   10351   // Optimize concat_vectors where one of the vectors is undef.
   10352   if (N->getNumOperands() == 2 &&
   10353       N->getOperand(1)->getOpcode() == ISD::UNDEF) {
   10354     SDValue In = N->getOperand(0);
   10355     assert(In.getValueType().isVector() && "Must concat vectors");
   10356 
   10357     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
   10358     if (In->getOpcode() == ISD::BITCAST &&
   10359         !In->getOperand(0)->getValueType(0).isVector()) {
   10360       SDValue Scalar = In->getOperand(0);
   10361       EVT SclTy = Scalar->getValueType(0);
   10362 
   10363       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
   10364         return SDValue();
   10365 
   10366       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy,
   10367                                  VT.getSizeInBits() / SclTy.getSizeInBits());
   10368       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
   10369         return SDValue();
   10370 
   10371       SDLoc dl = SDLoc(N);
   10372       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar);
   10373       return DAG.getNode(ISD::BITCAST, dl, VT, Res);
   10374     }
   10375   }
   10376 
   10377   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
   10378   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
   10379   if (N->getNumOperands() == 2 &&
   10380       N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
   10381       N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) {
   10382     EVT VT = N->getValueType(0);
   10383     SDValue N0 = N->getOperand(0);
   10384     SDValue N1 = N->getOperand(1);
   10385     SmallVector<SDValue, 8> Opnds;
   10386     unsigned BuildVecNumElts =  N0.getNumOperands();
   10387 
   10388     EVT SclTy0 = N0.getOperand(0)->getValueType(0);
   10389     EVT SclTy1 = N1.getOperand(0)->getValueType(0);
   10390     if (SclTy0.isFloatingPoint()) {
   10391       for (unsigned i = 0; i != BuildVecNumElts; ++i)
   10392         Opnds.push_back(N0.getOperand(i));
   10393       for (unsigned i = 0; i != BuildVecNumElts; ++i)
   10394         Opnds.push_back(N1.getOperand(i));
   10395     } else {
   10396       // If BUILD_VECTOR are from built from integer, they may have different
   10397       // operand types. Get the smaller type and truncate all operands to it.
   10398       EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1;
   10399       for (unsigned i = 0; i != BuildVecNumElts; ++i)
   10400         Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
   10401                         N0.getOperand(i)));
   10402       for (unsigned i = 0; i != BuildVecNumElts; ++i)
   10403         Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
   10404                         N1.getOperand(i)));
   10405     }
   10406 
   10407     return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
   10408   }
   10409 
   10410   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
   10411   // nodes often generate nop CONCAT_VECTOR nodes.
   10412   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
   10413   // place the incoming vectors at the exact same location.
   10414   SDValue SingleSource = SDValue();
   10415   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
   10416 
   10417   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
   10418     SDValue Op = N->getOperand(i);
   10419 
   10420     if (Op.getOpcode() == ISD::UNDEF)
   10421       continue;
   10422 
   10423     // Check if this is the identity extract:
   10424     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
   10425       return SDValue();
   10426 
   10427     // Find the single incoming vector for the extract_subvector.
   10428     if (SingleSource.getNode()) {
   10429       if (Op.getOperand(0) != SingleSource)
   10430         return SDValue();
   10431     } else {
   10432       SingleSource = Op.getOperand(0);
   10433 
   10434       // Check the source type is the same as the type of the result.
   10435       // If not, this concat may extend the vector, so we can not
   10436       // optimize it away.
   10437       if (SingleSource.getValueType() != N->getValueType(0))
   10438         return SDValue();
   10439     }
   10440 
   10441     unsigned IdentityIndex = i * PartNumElem;
   10442     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
   10443     // The extract index must be constant.
   10444     if (!CS)
   10445       return SDValue();
   10446 
   10447     // Check that we are reading from the identity index.
   10448     if (CS->getZExtValue() != IdentityIndex)
   10449       return SDValue();
   10450   }
   10451 
   10452   if (SingleSource.getNode())
   10453     return SingleSource;
   10454 
   10455   return SDValue();
   10456 }
   10457 
   10458 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
   10459   EVT NVT = N->getValueType(0);
   10460   SDValue V = N->getOperand(0);
   10461 
   10462   if (V->getOpcode() == ISD::CONCAT_VECTORS) {
   10463     // Combine:
   10464     //    (extract_subvec (concat V1, V2, ...), i)
   10465     // Into:
   10466     //    Vi if possible
   10467     // Only operand 0 is checked as 'concat' assumes all inputs of the same
   10468     // type.
   10469     if (V->getOperand(0).getValueType() != NVT)
   10470       return SDValue();
   10471     unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
   10472     unsigned NumElems = NVT.getVectorNumElements();
   10473     assert((Idx % NumElems) == 0 &&
   10474            "IDX in concat is not a multiple of the result vector length.");
   10475     return V->getOperand(Idx / NumElems);
   10476   }
   10477 
   10478   // Skip bitcasting
   10479   if (V->getOpcode() == ISD::BITCAST)
   10480     V = V.getOperand(0);
   10481 
   10482   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
   10483     SDLoc dl(N);
   10484     // Handle only simple case where vector being inserted and vector
   10485     // being extracted are of same type, and are half size of larger vectors.
   10486     EVT BigVT = V->getOperand(0).getValueType();
   10487     EVT SmallVT = V->getOperand(1).getValueType();
   10488     if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
   10489       return SDValue();
   10490 
   10491     // Only handle cases where both indexes are constants with the same type.
   10492     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
   10493     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
   10494 
   10495     if (InsIdx && ExtIdx &&
   10496         InsIdx->getValueType(0).getSizeInBits() <= 64 &&
   10497         ExtIdx->getValueType(0).getSizeInBits() <= 64) {
   10498       // Combine:
   10499       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
   10500       // Into:
   10501       //    indices are equal or bit offsets are equal => V1
   10502       //    otherwise => (extract_subvec V1, ExtIdx)
   10503       if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
   10504           ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
   10505         return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1));
   10506       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT,
   10507                          DAG.getNode(ISD::BITCAST, dl,
   10508                                      N->getOperand(0).getValueType(),
   10509                                      V->getOperand(0)), N->getOperand(1));
   10510     }
   10511   }
   10512 
   10513   return SDValue();
   10514 }
   10515 
   10516 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat.
   10517 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
   10518   EVT VT = N->getValueType(0);
   10519   unsigned NumElts = VT.getVectorNumElements();
   10520 
   10521   SDValue N0 = N->getOperand(0);
   10522   SDValue N1 = N->getOperand(1);
   10523   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
   10524 
   10525   SmallVector<SDValue, 4> Ops;
   10526   EVT ConcatVT = N0.getOperand(0).getValueType();
   10527   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
   10528   unsigned NumConcats = NumElts / NumElemsPerConcat;
   10529 
   10530   // Look at every vector that's inserted. We're looking for exact
   10531   // subvector-sized copies from a concatenated vector
   10532   for (unsigned I = 0; I != NumConcats; ++I) {
   10533     // Make sure we're dealing with a copy.
   10534     unsigned Begin = I * NumElemsPerConcat;
   10535     bool AllUndef = true, NoUndef = true;
   10536     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
   10537       if (SVN->getMaskElt(J) >= 0)
   10538         AllUndef = false;
   10539       else
   10540         NoUndef = false;
   10541     }
   10542 
   10543     if (NoUndef) {
   10544       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
   10545         return SDValue();
   10546 
   10547       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
   10548         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
   10549           return SDValue();
   10550 
   10551       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
   10552       if (FirstElt < N0.getNumOperands())
   10553         Ops.push_back(N0.getOperand(FirstElt));
   10554       else
   10555         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
   10556 
   10557     } else if (AllUndef) {
   10558       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
   10559     } else { // Mixed with general masks and undefs, can't do optimization.
   10560       return SDValue();
   10561     }
   10562   }
   10563 
   10564   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
   10565 }
   10566 
   10567 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   10568   EVT VT = N->getValueType(0);
   10569   unsigned NumElts = VT.getVectorNumElements();
   10570 
   10571   SDValue N0 = N->getOperand(0);
   10572   SDValue N1 = N->getOperand(1);
   10573 
   10574   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
   10575 
   10576   // Canonicalize shuffle undef, undef -> undef
   10577   if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
   10578     return DAG.getUNDEF(VT);
   10579 
   10580   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
   10581 
   10582   // Canonicalize shuffle v, v -> v, undef
   10583   if (N0 == N1) {
   10584     SmallVector<int, 8> NewMask;
   10585     for (unsigned i = 0; i != NumElts; ++i) {
   10586       int Idx = SVN->getMaskElt(i);
   10587       if (Idx >= (int)NumElts) Idx -= NumElts;
   10588       NewMask.push_back(Idx);
   10589     }
   10590     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
   10591                                 &NewMask[0]);
   10592   }
   10593 
   10594   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
   10595   if (N0.getOpcode() == ISD::UNDEF) {
   10596     SmallVector<int, 8> NewMask;
   10597     for (unsigned i = 0; i != NumElts; ++i) {
   10598       int Idx = SVN->getMaskElt(i);
   10599       if (Idx >= 0) {
   10600         if (Idx >= (int)NumElts)
   10601           Idx -= NumElts;
   10602         else
   10603           Idx = -1; // remove reference to lhs
   10604       }
   10605       NewMask.push_back(Idx);
   10606     }
   10607     return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT),
   10608                                 &NewMask[0]);
   10609   }
   10610 
   10611   // Remove references to rhs if it is undef
   10612   if (N1.getOpcode() == ISD::UNDEF) {
   10613     bool Changed = false;
   10614     SmallVector<int, 8> NewMask;
   10615     for (unsigned i = 0; i != NumElts; ++i) {
   10616       int Idx = SVN->getMaskElt(i);
   10617       if (Idx >= (int)NumElts) {
   10618         Idx = -1;
   10619         Changed = true;
   10620       }
   10621       NewMask.push_back(Idx);
   10622     }
   10623     if (Changed)
   10624       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]);
   10625   }
   10626 
   10627   // If it is a splat, check if the argument vector is another splat or a
   10628   // build_vector with all scalar elements the same.
   10629   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
   10630     SDNode *V = N0.getNode();
   10631 
   10632     // If this is a bit convert that changes the element type of the vector but
   10633     // not the number of vector elements, look through it.  Be careful not to
   10634     // look though conversions that change things like v4f32 to v2f64.
   10635     if (V->getOpcode() == ISD::BITCAST) {
   10636       SDValue ConvInput = V->getOperand(0);
   10637       if (ConvInput.getValueType().isVector() &&
   10638           ConvInput.getValueType().getVectorNumElements() == NumElts)
   10639         V = ConvInput.getNode();
   10640     }
   10641 
   10642     if (V->getOpcode() == ISD::BUILD_VECTOR) {
   10643       assert(V->getNumOperands() == NumElts &&
   10644              "BUILD_VECTOR has wrong number of operands");
   10645       SDValue Base;
   10646       bool AllSame = true;
   10647       for (unsigned i = 0; i != NumElts; ++i) {
   10648         if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
   10649           Base = V->getOperand(i);
   10650           break;
   10651         }
   10652       }
   10653       // Splat of <u, u, u, u>, return <u, u, u, u>
   10654       if (!Base.getNode())
   10655         return N0;
   10656       for (unsigned i = 0; i != NumElts; ++i) {
   10657         if (V->getOperand(i) != Base) {
   10658           AllSame = false;
   10659           break;
   10660         }
   10661       }
   10662       // Splat of <x, x, x, x>, return <x, x, x, x>
   10663       if (AllSame)
   10664         return N0;
   10665     }
   10666   }
   10667 
   10668   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
   10669       Level < AfterLegalizeVectorOps &&
   10670       (N1.getOpcode() == ISD::UNDEF ||
   10671       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
   10672        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
   10673     SDValue V = partitionShuffleOfConcats(N, DAG);
   10674 
   10675     if (V.getNode())
   10676       return V;
   10677   }
   10678 
   10679   // If this shuffle node is simply a swizzle of another shuffle node,
   10680   // then try to simplify it.
   10681   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
   10682       N1.getOpcode() == ISD::UNDEF) {
   10683 
   10684     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
   10685 
   10686     // The incoming shuffle must be of the same type as the result of the
   10687     // current shuffle.
   10688     assert(OtherSV->getOperand(0).getValueType() == VT &&
   10689            "Shuffle types don't match");
   10690 
   10691     SmallVector<int, 4> Mask;
   10692     // Compute the combined shuffle mask.
   10693     for (unsigned i = 0; i != NumElts; ++i) {
   10694       int Idx = SVN->getMaskElt(i);
   10695       assert(Idx < (int)NumElts && "Index references undef operand");
   10696       // Next, this index comes from the first value, which is the incoming
   10697       // shuffle. Adopt the incoming index.
   10698       if (Idx >= 0)
   10699         Idx = OtherSV->getMaskElt(Idx);
   10700       Mask.push_back(Idx);
   10701     }
   10702 
   10703     bool CommuteOperands = false;
   10704     if (N0.getOperand(1).getOpcode() != ISD::UNDEF) {
   10705       // To be valid, the combine shuffle mask should only reference elements
   10706       // from one of the two vectors in input to the inner shufflevector.
   10707       bool IsValidMask = true;
   10708       for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
   10709         // See if the combined mask only reference undefs or elements coming
   10710         // from the first shufflevector operand.
   10711         IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts;
   10712 
   10713       if (!IsValidMask) {
   10714         IsValidMask = true;
   10715         for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
   10716           // Check that all the elements come from the second shuffle operand.
   10717           IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts;
   10718         CommuteOperands = IsValidMask;
   10719       }
   10720 
   10721       // Early exit if the combined shuffle mask is not valid.
   10722       if (!IsValidMask)
   10723         return SDValue();
   10724     }
   10725 
   10726     // See if this pair of shuffles can be safely folded according to either
   10727     // of the following rules:
   10728     //   shuffle(shuffle(x, y), undef) -> x
   10729     //   shuffle(shuffle(x, undef), undef) -> x
   10730     //   shuffle(shuffle(x, y), undef) -> y
   10731     bool IsIdentityMask = true;
   10732     unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0;
   10733     for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) {
   10734       // Skip Undefs.
   10735       if (Mask[i] < 0)
   10736         continue;
   10737 
   10738       // The combined shuffle must map each index to itself.
   10739       IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex;
   10740     }
   10741 
   10742     if (IsIdentityMask) {
   10743       if (CommuteOperands)
   10744         // optimize shuffle(shuffle(x, y), undef) -> y.
   10745         return OtherSV->getOperand(1);
   10746 
   10747       // optimize shuffle(shuffle(x, undef), undef) -> x
   10748       // optimize shuffle(shuffle(x, y), undef) -> x
   10749       return OtherSV->getOperand(0);
   10750     }
   10751 
   10752     // It may still be beneficial to combine the two shuffles if the
   10753     // resulting shuffle is legal.
   10754     if (TLI.isShuffleMaskLegal(Mask, VT)) {
   10755       if (!CommuteOperands)
   10756         // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
   10757         // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3)
   10758         return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
   10759                                     &Mask[0]);
   10760 
   10761       //   shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3)
   10762       return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1),
   10763                                   &Mask[0]);
   10764     }
   10765   }
   10766 
   10767   return SDValue();
   10768 }
   10769 
   10770 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
   10771   SDValue N0 = N->getOperand(0);
   10772   SDValue N2 = N->getOperand(2);
   10773 
   10774   // If the input vector is a concatenation, and the insert replaces
   10775   // one of the halves, we can optimize into a single concat_vectors.
   10776   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
   10777       N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) {
   10778     APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
   10779     EVT VT = N->getValueType(0);
   10780 
   10781     // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
   10782     // (concat_vectors Z, Y)
   10783     if (InsIdx == 0)
   10784       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
   10785                          N->getOperand(1), N0.getOperand(1));
   10786 
   10787     // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
   10788     // (concat_vectors X, Z)
   10789     if (InsIdx == VT.getVectorNumElements()/2)
   10790       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
   10791                          N0.getOperand(0), N->getOperand(1));
   10792   }
   10793 
   10794   return SDValue();
   10795 }
   10796 
   10797 /// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
   10798 /// an AND to a vector_shuffle with the destination vector and a zero vector.
   10799 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
   10800 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
   10801 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
   10802   EVT VT = N->getValueType(0);
   10803   SDLoc dl(N);
   10804   SDValue LHS = N->getOperand(0);
   10805   SDValue RHS = N->getOperand(1);
   10806   if (N->getOpcode() == ISD::AND) {
   10807     if (RHS.getOpcode() == ISD::BITCAST)
   10808       RHS = RHS.getOperand(0);
   10809     if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
   10810       SmallVector<int, 8> Indices;
   10811       unsigned NumElts = RHS.getNumOperands();
   10812       for (unsigned i = 0; i != NumElts; ++i) {
   10813         SDValue Elt = RHS.getOperand(i);
   10814         if (!isa<ConstantSDNode>(Elt))
   10815           return SDValue();
   10816 
   10817         if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
   10818           Indices.push_back(i);
   10819         else if (cast<ConstantSDNode>(Elt)->isNullValue())
   10820           Indices.push_back(NumElts);
   10821         else
   10822           return SDValue();
   10823       }
   10824 
   10825       // Let's see if the target supports this vector_shuffle.
   10826       EVT RVT = RHS.getValueType();
   10827       if (!TLI.isVectorClearMaskLegal(Indices, RVT))
   10828         return SDValue();
   10829 
   10830       // Return the new VECTOR_SHUFFLE node.
   10831       EVT EltVT = RVT.getVectorElementType();
   10832       SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
   10833                                      DAG.getConstant(0, EltVT));
   10834       SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps);
   10835       LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
   10836       SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
   10837       return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
   10838     }
   10839   }
   10840 
   10841   return SDValue();
   10842 }
   10843 
   10844 /// SimplifyVBinOp - Visit a binary vector operation, like ADD.
   10845 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
   10846   assert(N->getValueType(0).isVector() &&
   10847          "SimplifyVBinOp only works on vectors!");
   10848 
   10849   SDValue LHS = N->getOperand(0);
   10850   SDValue RHS = N->getOperand(1);
   10851   SDValue Shuffle = XformToShuffleWithZero(N);
   10852   if (Shuffle.getNode()) return Shuffle;
   10853 
   10854   // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
   10855   // this operation.
   10856   if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
   10857       RHS.getOpcode() == ISD::BUILD_VECTOR) {
   10858     // Check if both vectors are constants. If not bail out.
   10859     if (!(cast<BuildVectorSDNode>(LHS)->isConstant() &&
   10860           cast<BuildVectorSDNode>(RHS)->isConstant()))
   10861       return SDValue();
   10862 
   10863     SmallVector<SDValue, 8> Ops;
   10864     for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
   10865       SDValue LHSOp = LHS.getOperand(i);
   10866       SDValue RHSOp = RHS.getOperand(i);
   10867 
   10868       // Can't fold divide by zero.
   10869       if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
   10870           N->getOpcode() == ISD::FDIV) {
   10871         if ((RHSOp.getOpcode() == ISD::Constant &&
   10872              cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) ||
   10873             (RHSOp.getOpcode() == ISD::ConstantFP &&
   10874              cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))
   10875           break;
   10876       }
   10877 
   10878       EVT VT = LHSOp.getValueType();
   10879       EVT RVT = RHSOp.getValueType();
   10880       if (RVT != VT) {
   10881         // Integer BUILD_VECTOR operands may have types larger than the element
   10882         // size (e.g., when the element type is not legal).  Prior to type
   10883         // legalization, the types may not match between the two BUILD_VECTORS.
   10884         // Truncate one of the operands to make them match.
   10885         if (RVT.getSizeInBits() > VT.getSizeInBits()) {
   10886           RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, RHSOp);
   10887         } else {
   10888           LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), RVT, LHSOp);
   10889           VT = RVT;
   10890         }
   10891       }
   10892       SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT,
   10893                                    LHSOp, RHSOp);
   10894       if (FoldOp.getOpcode() != ISD::UNDEF &&
   10895           FoldOp.getOpcode() != ISD::Constant &&
   10896           FoldOp.getOpcode() != ISD::ConstantFP)
   10897         break;
   10898       Ops.push_back(FoldOp);
   10899       AddToWorkList(FoldOp.getNode());
   10900     }
   10901 
   10902     if (Ops.size() == LHS.getNumOperands())
   10903       return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops);
   10904   }
   10905 
   10906   // Type legalization might introduce new shuffles in the DAG.
   10907   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
   10908   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
   10909   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
   10910       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
   10911       LHS.getOperand(1).getOpcode() == ISD::UNDEF &&
   10912       RHS.getOperand(1).getOpcode() == ISD::UNDEF) {
   10913     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
   10914     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
   10915 
   10916     if (SVN0->getMask().equals(SVN1->getMask())) {
   10917       EVT VT = N->getValueType(0);
   10918       SDValue UndefVector = LHS.getOperand(1);
   10919       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
   10920                                      LHS.getOperand(0), RHS.getOperand(0));
   10921       AddUsersToWorkList(N);
   10922       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
   10923                                   &SVN0->getMask()[0]);
   10924     }
   10925   }
   10926 
   10927   return SDValue();
   10928 }
   10929 
   10930 /// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG.
   10931 SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
   10932   assert(N->getValueType(0).isVector() &&
   10933          "SimplifyVUnaryOp only works on vectors!");
   10934 
   10935   SDValue N0 = N->getOperand(0);
   10936 
   10937   if (N0.getOpcode() != ISD::BUILD_VECTOR)
   10938     return SDValue();
   10939 
   10940   // Operand is a BUILD_VECTOR node, see if we can constant fold it.
   10941   SmallVector<SDValue, 8> Ops;
   10942   for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
   10943     SDValue Op = N0.getOperand(i);
   10944     if (Op.getOpcode() != ISD::UNDEF &&
   10945         Op.getOpcode() != ISD::ConstantFP)
   10946       break;
   10947     EVT EltVT = Op.getValueType();
   10948     SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op);
   10949     if (FoldOp.getOpcode() != ISD::UNDEF &&
   10950         FoldOp.getOpcode() != ISD::ConstantFP)
   10951       break;
   10952     Ops.push_back(FoldOp);
   10953     AddToWorkList(FoldOp.getNode());
   10954   }
   10955 
   10956   if (Ops.size() != N0.getNumOperands())
   10957     return SDValue();
   10958 
   10959   return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), Ops);
   10960 }
   10961 
   10962 SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
   10963                                     SDValue N1, SDValue N2){
   10964   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
   10965 
   10966   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
   10967                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
   10968 
   10969   // If we got a simplified select_cc node back from SimplifySelectCC, then
   10970   // break it down into a new SETCC node, and a new SELECT node, and then return
   10971   // the SELECT node, since we were called with a SELECT node.
   10972   if (SCC.getNode()) {
   10973     // Check to see if we got a select_cc back (to turn into setcc/select).
   10974     // Otherwise, just return whatever node we got back, like fabs.
   10975     if (SCC.getOpcode() == ISD::SELECT_CC) {
   10976       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
   10977                                   N0.getValueType(),
   10978                                   SCC.getOperand(0), SCC.getOperand(1),
   10979                                   SCC.getOperand(4));
   10980       AddToWorkList(SETCC.getNode());
   10981       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(),
   10982                            SCC.getOperand(2), SCC.getOperand(3), SETCC);
   10983     }
   10984 
   10985     return SCC;
   10986   }
   10987   return SDValue();
   10988 }
   10989 
   10990 /// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
   10991 /// are the two values being selected between, see if we can simplify the
   10992 /// select.  Callers of this should assume that TheSelect is deleted if this
   10993 /// returns true.  As such, they should return the appropriate thing (e.g. the
   10994 /// node) back to the top-level of the DAG combiner loop to avoid it being
   10995 /// looked at.
   10996 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
   10997                                     SDValue RHS) {
   10998 
   10999   // Cannot simplify select with vector condition
   11000   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
   11001 
   11002   // If this is a select from two identical things, try to pull the operation
   11003   // through the select.
   11004   if (LHS.getOpcode() != RHS.getOpcode() ||
   11005       !LHS.hasOneUse() || !RHS.hasOneUse())
   11006     return false;
   11007 
   11008   // If this is a load and the token chain is identical, replace the select
   11009   // of two loads with a load through a select of the address to load from.
   11010   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
   11011   // constants have been dropped into the constant pool.
   11012   if (LHS.getOpcode() == ISD::LOAD) {
   11013     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
   11014     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
   11015 
   11016     // Token chains must be identical.
   11017     if (LHS.getOperand(0) != RHS.getOperand(0) ||
   11018         // Do not let this transformation reduce the number of volatile loads.
   11019         LLD->isVolatile() || RLD->isVolatile() ||
   11020         // If this is an EXTLOAD, the VT's must match.
   11021         LLD->getMemoryVT() != RLD->getMemoryVT() ||
   11022         // If this is an EXTLOAD, the kind of extension must match.
   11023         (LLD->getExtensionType() != RLD->getExtensionType() &&
   11024          // The only exception is if one of the extensions is anyext.
   11025          LLD->getExtensionType() != ISD::EXTLOAD &&
   11026          RLD->getExtensionType() != ISD::EXTLOAD) ||
   11027         // FIXME: this discards src value information.  This is
   11028         // over-conservative. It would be beneficial to be able to remember
   11029         // both potential memory locations.  Since we are discarding
   11030         // src value info, don't do the transformation if the memory
   11031         // locations are not in the default address space.
   11032         LLD->getPointerInfo().getAddrSpace() != 0 ||
   11033         RLD->getPointerInfo().getAddrSpace() != 0 ||
   11034         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
   11035                                       LLD->getBasePtr().getValueType()))
   11036       return false;
   11037 
   11038     // Check that the select condition doesn't reach either load.  If so,
   11039     // folding this will induce a cycle into the DAG.  If not, this is safe to
   11040     // xform, so create a select of the addresses.
   11041     SDValue Addr;
   11042     if (TheSelect->getOpcode() == ISD::SELECT) {
   11043       SDNode *CondNode = TheSelect->getOperand(0).getNode();
   11044       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
   11045           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
   11046         return false;
   11047       // The loads must not depend on one another.
   11048       if (LLD->isPredecessorOf(RLD) ||
   11049           RLD->isPredecessorOf(LLD))
   11050         return false;
   11051       Addr = DAG.getSelect(SDLoc(TheSelect),
   11052                            LLD->getBasePtr().getValueType(),
   11053                            TheSelect->getOperand(0), LLD->getBasePtr(),
   11054                            RLD->getBasePtr());
   11055     } else {  // Otherwise SELECT_CC
   11056       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
   11057       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
   11058 
   11059       if ((LLD->hasAnyUseOfValue(1) &&
   11060            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
   11061           (RLD->hasAnyUseOfValue(1) &&
   11062            (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
   11063         return false;
   11064 
   11065       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
   11066                          LLD->getBasePtr().getValueType(),
   11067                          TheSelect->getOperand(0),
   11068                          TheSelect->getOperand(1),
   11069                          LLD->getBasePtr(), RLD->getBasePtr(),
   11070                          TheSelect->getOperand(4));
   11071     }
   11072 
   11073     SDValue Load;
   11074     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
   11075       Load = DAG.getLoad(TheSelect->getValueType(0),
   11076                          SDLoc(TheSelect),
   11077                          // FIXME: Discards pointer and TBAA info.
   11078                          LLD->getChain(), Addr, MachinePointerInfo(),
   11079                          LLD->isVolatile(), LLD->isNonTemporal(),
   11080                          LLD->isInvariant(), LLD->getAlignment());
   11081     } else {
   11082       Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
   11083                             RLD->getExtensionType() : LLD->getExtensionType(),
   11084                             SDLoc(TheSelect),
   11085                             TheSelect->getValueType(0),
   11086                             // FIXME: Discards pointer and TBAA info.
   11087                             LLD->getChain(), Addr, MachinePointerInfo(),
   11088                             LLD->getMemoryVT(), LLD->isVolatile(),
   11089                             LLD->isNonTemporal(), LLD->getAlignment());
   11090     }
   11091 
   11092     // Users of the select now use the result of the load.
   11093     CombineTo(TheSelect, Load);
   11094 
   11095     // Users of the old loads now use the new load's chain.  We know the
   11096     // old-load value is dead now.
   11097     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
   11098     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
   11099     return true;
   11100   }
   11101 
   11102   return false;
   11103 }
   11104 
   11105 /// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3
   11106 /// where 'cond' is the comparison specified by CC.
   11107 SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
   11108                                       SDValue N2, SDValue N3,
   11109                                       ISD::CondCode CC, bool NotExtCompare) {
   11110   // (x ? y : y) -> y.
   11111   if (N2 == N3) return N2;
   11112 
   11113   EVT VT = N2.getValueType();
   11114   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
   11115   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
   11116   ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
   11117 
   11118   // Determine if the condition we're dealing with is constant
   11119   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
   11120                               N0, N1, CC, DL, false);
   11121   if (SCC.getNode()) AddToWorkList(SCC.getNode());
   11122   ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
   11123 
   11124   // fold select_cc true, x, y -> x
   11125   if (SCCC && !SCCC->isNullValue())
   11126     return N2;
   11127   // fold select_cc false, x, y -> y
   11128   if (SCCC && SCCC->isNullValue())
   11129     return N3;
   11130 
   11131   // Check to see if we can simplify the select into an fabs node
   11132   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
   11133     // Allow either -0.0 or 0.0
   11134     if (CFP->getValueAPF().isZero()) {
   11135       // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
   11136       if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
   11137           N0 == N2 && N3.getOpcode() == ISD::FNEG &&
   11138           N2 == N3.getOperand(0))
   11139         return DAG.getNode(ISD::FABS, DL, VT, N0);
   11140 
   11141       // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
   11142       if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
   11143           N0 == N3 && N2.getOpcode() == ISD::FNEG &&
   11144           N2.getOperand(0) == N3)
   11145         return DAG.getNode(ISD::FABS, DL, VT, N3);
   11146     }
   11147   }
   11148 
   11149   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
   11150   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
   11151   // in it.  This is a win when the constant is not otherwise available because
   11152   // it replaces two constant pool loads with one.  We only do this if the FP
   11153   // type is known to be legal, because if it isn't, then we are before legalize
   11154   // types an we want the other legalization to happen first (e.g. to avoid
   11155   // messing with soft float) and if the ConstantFP is not legal, because if
   11156   // it is legal, we may not need to store the FP constant in a constant pool.
   11157   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
   11158     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
   11159       if (TLI.isTypeLegal(N2.getValueType()) &&
   11160           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
   11161                TargetLowering::Legal &&
   11162            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
   11163            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
   11164           // If both constants have multiple uses, then we won't need to do an
   11165           // extra load, they are likely around in registers for other users.
   11166           (TV->hasOneUse() || FV->hasOneUse())) {
   11167         Constant *Elts[] = {
   11168           const_cast<ConstantFP*>(FV->getConstantFPValue()),
   11169           const_cast<ConstantFP*>(TV->getConstantFPValue())
   11170         };
   11171         Type *FPTy = Elts[0]->getType();
   11172         const DataLayout &TD = *TLI.getDataLayout();
   11173 
   11174         // Create a ConstantArray of the two constants.
   11175         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
   11176         SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
   11177                                             TD.getPrefTypeAlignment(FPTy));
   11178         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
   11179 
   11180         // Get the offsets to the 0 and 1 element of the array so that we can
   11181         // select between them.
   11182         SDValue Zero = DAG.getIntPtrConstant(0);
   11183         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
   11184         SDValue One = DAG.getIntPtrConstant(EltSize);
   11185 
   11186         SDValue Cond = DAG.getSetCC(DL,
   11187                                     getSetCCResultType(N0.getValueType()),
   11188                                     N0, N1, CC);
   11189         AddToWorkList(Cond.getNode());
   11190         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
   11191                                           Cond, One, Zero);
   11192         AddToWorkList(CstOffset.getNode());
   11193         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
   11194                             CstOffset);
   11195         AddToWorkList(CPIdx.getNode());
   11196         return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
   11197                            MachinePointerInfo::getConstantPool(), false,
   11198                            false, false, Alignment);
   11199 
   11200       }
   11201     }
   11202 
   11203   // Check to see if we can perform the "gzip trick", transforming
   11204   // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
   11205   if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
   11206       (N1C->isNullValue() ||                         // (a < 0) ? b : 0
   11207        (N1C->getAPIntValue() == 1 && N0 == N2))) {   // (a < 1) ? a : 0
   11208     EVT XType = N0.getValueType();
   11209     EVT AType = N2.getValueType();
   11210     if (XType.bitsGE(AType)) {
   11211       // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
   11212       // single-bit constant.
   11213       if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
   11214         unsigned ShCtV = N2C->getAPIntValue().logBase2();
   11215         ShCtV = XType.getSizeInBits()-ShCtV-1;
   11216         SDValue ShCt = DAG.getConstant(ShCtV,
   11217                                        getShiftAmountTy(N0.getValueType()));
   11218         SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
   11219                                     XType, N0, ShCt);
   11220         AddToWorkList(Shift.getNode());
   11221 
   11222         if (XType.bitsGT(AType)) {
   11223           Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
   11224           AddToWorkList(Shift.getNode());
   11225         }
   11226 
   11227         return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
   11228       }
   11229 
   11230       SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
   11231                                   XType, N0,
   11232                                   DAG.getConstant(XType.getSizeInBits()-1,
   11233                                          getShiftAmountTy(N0.getValueType())));
   11234       AddToWorkList(Shift.getNode());
   11235 
   11236       if (XType.bitsGT(AType)) {
   11237         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
   11238         AddToWorkList(Shift.getNode());
   11239       }
   11240 
   11241       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
   11242     }
   11243   }
   11244 
   11245   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
   11246   // where y is has a single bit set.
   11247   // A plaintext description would be, we can turn the SELECT_CC into an AND
   11248   // when the condition can be materialized as an all-ones register.  Any
   11249   // single bit-test can be materialized as an all-ones register with
   11250   // shift-left and shift-right-arith.
   11251   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
   11252       N0->getValueType(0) == VT &&
   11253       N1C && N1C->isNullValue() &&
   11254       N2C && N2C->isNullValue()) {
   11255     SDValue AndLHS = N0->getOperand(0);
   11256     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
   11257     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
   11258       // Shift the tested bit over the sign bit.
   11259       APInt AndMask = ConstAndRHS->getAPIntValue();
   11260       SDValue ShlAmt =
   11261         DAG.getConstant(AndMask.countLeadingZeros(),
   11262                         getShiftAmountTy(AndLHS.getValueType()));
   11263       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
   11264 
   11265       // Now arithmetic right shift it all the way over, so the result is either
   11266       // all-ones, or zero.
   11267       SDValue ShrAmt =
   11268         DAG.getConstant(AndMask.getBitWidth()-1,
   11269                         getShiftAmountTy(Shl.getValueType()));
   11270       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
   11271 
   11272       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
   11273     }
   11274   }
   11275 
   11276   // fold select C, 16, 0 -> shl C, 4
   11277   if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
   11278       TLI.getBooleanContents(N0.getValueType()) ==
   11279           TargetLowering::ZeroOrOneBooleanContent) {
   11280 
   11281     // If the caller doesn't want us to simplify this into a zext of a compare,
   11282     // don't do it.
   11283     if (NotExtCompare && N2C->getAPIntValue() == 1)
   11284       return SDValue();
   11285 
   11286     // Get a SetCC of the condition
   11287     // NOTE: Don't create a SETCC if it's not legal on this target.
   11288     if (!LegalOperations ||
   11289         TLI.isOperationLegal(ISD::SETCC,
   11290           LegalTypes ? getSetCCResultType(N0.getValueType()) : MVT::i1)) {
   11291       SDValue Temp, SCC;
   11292       // cast from setcc result type to select result type
   11293       if (LegalTypes) {
   11294         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
   11295                             N0, N1, CC);
   11296         if (N2.getValueType().bitsLT(SCC.getValueType()))
   11297           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
   11298                                         N2.getValueType());
   11299         else
   11300           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
   11301                              N2.getValueType(), SCC);
   11302       } else {
   11303         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
   11304         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
   11305                            N2.getValueType(), SCC);
   11306       }
   11307 
   11308       AddToWorkList(SCC.getNode());
   11309       AddToWorkList(Temp.getNode());
   11310 
   11311       if (N2C->getAPIntValue() == 1)
   11312         return Temp;
   11313 
   11314       // shl setcc result by log2 n2c
   11315       return DAG.getNode(
   11316           ISD::SHL, DL, N2.getValueType(), Temp,
   11317           DAG.getConstant(N2C->getAPIntValue().logBase2(),
   11318                           getShiftAmountTy(Temp.getValueType())));
   11319     }
   11320   }
   11321 
   11322   // Check to see if this is the equivalent of setcc
   11323   // FIXME: Turn all of these into setcc if setcc if setcc is legal
   11324   // otherwise, go ahead with the folds.
   11325   if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
   11326     EVT XType = N0.getValueType();
   11327     if (!LegalOperations ||
   11328         TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) {
   11329       SDValue Res = DAG.getSetCC(DL, getSetCCResultType(XType), N0, N1, CC);
   11330       if (Res.getValueType() != VT)
   11331         Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
   11332       return Res;
   11333     }
   11334 
   11335     // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
   11336     if (N1C && N1C->isNullValue() && CC == ISD::SETEQ &&
   11337         (!LegalOperations ||
   11338          TLI.isOperationLegal(ISD::CTLZ, XType))) {
   11339       SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0);
   11340       return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
   11341                          DAG.getConstant(Log2_32(XType.getSizeInBits()),
   11342                                        getShiftAmountTy(Ctlz.getValueType())));
   11343     }
   11344     // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
   11345     if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
   11346       SDValue NegN0 = DAG.getNode(ISD::SUB, SDLoc(N0),
   11347                                   XType, DAG.getConstant(0, XType), N0);
   11348       SDValue NotN0 = DAG.getNOT(SDLoc(N0), N0, XType);
   11349       return DAG.getNode(ISD::SRL, DL, XType,
   11350                          DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
   11351                          DAG.getConstant(XType.getSizeInBits()-1,
   11352                                          getShiftAmountTy(XType)));
   11353     }
   11354     // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
   11355     if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
   11356       SDValue Sign = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0,
   11357                                  DAG.getConstant(XType.getSizeInBits()-1,
   11358                                          getShiftAmountTy(N0.getValueType())));
   11359       return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
   11360     }
   11361   }
   11362 
   11363   // Check to see if this is an integer abs.
   11364   // select_cc setg[te] X,  0,  X, -X ->
   11365   // select_cc setgt    X, -1,  X, -X ->
   11366   // select_cc setl[te] X,  0, -X,  X ->
   11367   // select_cc setlt    X,  1, -X,  X ->
   11368   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
   11369   if (N1C) {
   11370     ConstantSDNode *SubC = nullptr;
   11371     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
   11372          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
   11373         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
   11374       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
   11375     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
   11376               (N1C->isOne() && CC == ISD::SETLT)) &&
   11377              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
   11378       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
   11379 
   11380     EVT XType = N0.getValueType();
   11381     if (SubC && SubC->isNullValue() && XType.isInteger()) {
   11382       SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType,
   11383                                   N0,
   11384                                   DAG.getConstant(XType.getSizeInBits()-1,
   11385                                          getShiftAmountTy(N0.getValueType())));
   11386       SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0),
   11387                                 XType, N0, Shift);
   11388       AddToWorkList(Shift.getNode());
   11389       AddToWorkList(Add.getNode());
   11390       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
   11391     }
   11392   }
   11393 
   11394   return SDValue();
   11395 }
   11396 
   11397 /// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
   11398 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
   11399                                    SDValue N1, ISD::CondCode Cond,
   11400                                    SDLoc DL, bool foldBooleans) {
   11401   TargetLowering::DAGCombinerInfo
   11402     DagCombineInfo(DAG, Level, false, this);
   11403   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
   11404 }
   11405 
   11406 /// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
   11407 /// return a DAG expression to select that will generate the same value by
   11408 /// multiplying by a magic number.  See:
   11409 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
   11410 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
   11411   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
   11412   if (!C)
   11413     return SDValue();
   11414 
   11415   // Avoid division by zero.
   11416   if (!C->getAPIntValue())
   11417     return SDValue();
   11418 
   11419   std::vector<SDNode*> Built;
   11420   SDValue S =
   11421       TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
   11422 
   11423   for (SDNode *N : Built)
   11424     AddToWorkList(N);
   11425   return S;
   11426 }
   11427 
   11428 /// BuildUDIV - Given an ISD::UDIV node expressing a divide by constant,
   11429 /// return a DAG expression to select that will generate the same value by
   11430 /// multiplying by a magic number.  See:
   11431 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
   11432 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
   11433   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
   11434   if (!C)
   11435     return SDValue();
   11436 
   11437   // Avoid division by zero.
   11438   if (!C->getAPIntValue())
   11439     return SDValue();
   11440 
   11441   std::vector<SDNode*> Built;
   11442   SDValue S =
   11443       TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
   11444 
   11445   for (SDNode *N : Built)
   11446     AddToWorkList(N);
   11447   return S;
   11448 }
   11449 
   11450 /// FindBaseOffset - Return true if base is a frame index, which is known not
   11451 // to alias with anything but itself.  Provides base object and offset as
   11452 // results.
   11453 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
   11454                            const GlobalValue *&GV, const void *&CV) {
   11455   // Assume it is a primitive operation.
   11456   Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
   11457 
   11458   // If it's an adding a simple constant then integrate the offset.
   11459   if (Base.getOpcode() == ISD::ADD) {
   11460     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
   11461       Base = Base.getOperand(0);
   11462       Offset += C->getZExtValue();
   11463     }
   11464   }
   11465 
   11466   // Return the underlying GlobalValue, and update the Offset.  Return false
   11467   // for GlobalAddressSDNode since the same GlobalAddress may be represented
   11468   // by multiple nodes with different offsets.
   11469   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
   11470     GV = G->getGlobal();
   11471     Offset += G->getOffset();
   11472     return false;
   11473   }
   11474 
   11475   // Return the underlying Constant value, and update the Offset.  Return false
   11476   // for ConstantSDNodes since the same constant pool entry may be represented
   11477   // by multiple nodes with different offsets.
   11478   if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
   11479     CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
   11480                                          : (const void *)C->getConstVal();
   11481     Offset += C->getOffset();
   11482     return false;
   11483   }
   11484   // If it's any of the following then it can't alias with anything but itself.
   11485   return isa<FrameIndexSDNode>(Base);
   11486 }
   11487 
   11488 /// isAlias - Return true if there is any possibility that the two addresses
   11489 /// overlap.
   11490 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
   11491   // If they are the same then they must be aliases.
   11492   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
   11493 
   11494   // If they are both volatile then they cannot be reordered.
   11495   if (Op0->isVolatile() && Op1->isVolatile()) return true;
   11496 
   11497   // Gather base node and offset information.
   11498   SDValue Base1, Base2;
   11499   int64_t Offset1, Offset2;
   11500   const GlobalValue *GV1, *GV2;
   11501   const void *CV1, *CV2;
   11502   bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(),
   11503                                       Base1, Offset1, GV1, CV1);
   11504   bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(),
   11505                                       Base2, Offset2, GV2, CV2);
   11506 
   11507   // If they have a same base address then check to see if they overlap.
   11508   if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
   11509     return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
   11510              (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
   11511 
   11512   // It is possible for different frame indices to alias each other, mostly
   11513   // when tail call optimization reuses return address slots for arguments.
   11514   // To catch this case, look up the actual index of frame indices to compute
   11515   // the real alias relationship.
   11516   if (isFrameIndex1 && isFrameIndex2) {
   11517     MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   11518     Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
   11519     Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
   11520     return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
   11521              (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
   11522   }
   11523 
   11524   // Otherwise, if we know what the bases are, and they aren't identical, then
   11525   // we know they cannot alias.
   11526   if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
   11527     return false;
   11528 
   11529   // If we know required SrcValue1 and SrcValue2 have relatively large alignment
   11530   // compared to the size and offset of the access, we may be able to prove they
   11531   // do not alias.  This check is conservative for now to catch cases created by
   11532   // splitting vector types.
   11533   if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) &&
   11534       (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) &&
   11535       (Op0->getMemoryVT().getSizeInBits() >> 3 ==
   11536        Op1->getMemoryVT().getSizeInBits() >> 3) &&
   11537       (Op0->getOriginalAlignment() > Op0->getMemoryVT().getSizeInBits()) >> 3) {
   11538     int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment();
   11539     int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment();
   11540 
   11541     // There is no overlap between these relatively aligned accesses of similar
   11542     // size, return no alias.
   11543     if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 ||
   11544         (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1)
   11545       return false;
   11546   }
   11547 
   11548   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA :
   11549     TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
   11550 #ifndef NDEBUG
   11551   if (CombinerAAOnlyFunc.getNumOccurrences() &&
   11552       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
   11553     UseAA = false;
   11554 #endif
   11555   if (UseAA &&
   11556       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
   11557     // Use alias analysis information.
   11558     int64_t MinOffset = std::min(Op0->getSrcValueOffset(),
   11559                                  Op1->getSrcValueOffset());
   11560     int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) +
   11561         Op0->getSrcValueOffset() - MinOffset;
   11562     int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
   11563         Op1->getSrcValueOffset() - MinOffset;
   11564     AliasAnalysis::AliasResult AAResult =
   11565         AA.alias(AliasAnalysis::Location(Op0->getMemOperand()->getValue(),
   11566                                          Overlap1,
   11567                                          UseTBAA ? Op0->getTBAAInfo() : nullptr),
   11568                  AliasAnalysis::Location(Op1->getMemOperand()->getValue(),
   11569                                          Overlap2,
   11570                                          UseTBAA ? Op1->getTBAAInfo() : nullptr));
   11571     if (AAResult == AliasAnalysis::NoAlias)
   11572       return false;
   11573   }
   11574 
   11575   // Otherwise we have to assume they alias.
   11576   return true;
   11577 }
   11578 
   11579 /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
   11580 /// looking for aliasing nodes and adding them to the Aliases vector.
   11581 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
   11582                                    SmallVectorImpl<SDValue> &Aliases) {
   11583   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
   11584   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
   11585 
   11586   // Get alias information for node.
   11587   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
   11588 
   11589   // Starting off.
   11590   Chains.push_back(OriginalChain);
   11591   unsigned Depth = 0;
   11592 
   11593   // Look at each chain and determine if it is an alias.  If so, add it to the
   11594   // aliases list.  If not, then continue up the chain looking for the next
   11595   // candidate.
   11596   while (!Chains.empty()) {
   11597     SDValue Chain = Chains.back();
   11598     Chains.pop_back();
   11599 
   11600     // For TokenFactor nodes, look at each operand and only continue up the
   11601     // chain until we find two aliases.  If we've seen two aliases, assume we'll
   11602     // find more and revert to original chain since the xform is unlikely to be
   11603     // profitable.
   11604     //
   11605     // FIXME: The depth check could be made to return the last non-aliasing
   11606     // chain we found before we hit a tokenfactor rather than the original
   11607     // chain.
   11608     if (Depth > 6 || Aliases.size() == 2) {
   11609       Aliases.clear();
   11610       Aliases.push_back(OriginalChain);
   11611       return;
   11612     }
   11613 
   11614     // Don't bother if we've been before.
   11615     if (!Visited.insert(Chain.getNode()))
   11616       continue;
   11617 
   11618     switch (Chain.getOpcode()) {
   11619     case ISD::EntryToken:
   11620       // Entry token is ideal chain operand, but handled in FindBetterChain.
   11621       break;
   11622 
   11623     case ISD::LOAD:
   11624     case ISD::STORE: {
   11625       // Get alias information for Chain.
   11626       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
   11627           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
   11628 
   11629       // If chain is alias then stop here.
   11630       if (!(IsLoad && IsOpLoad) &&
   11631           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
   11632         Aliases.push_back(Chain);
   11633       } else {
   11634         // Look further up the chain.
   11635         Chains.push_back(Chain.getOperand(0));
   11636         ++Depth;
   11637       }
   11638       break;
   11639     }
   11640 
   11641     case ISD::TokenFactor:
   11642       // We have to check each of the operands of the token factor for "small"
   11643       // token factors, so we queue them up.  Adding the operands to the queue
   11644       // (stack) in reverse order maintains the original order and increases the
   11645       // likelihood that getNode will find a matching token factor (CSE.)
   11646       if (Chain.getNumOperands() > 16) {
   11647         Aliases.push_back(Chain);
   11648         break;
   11649       }
   11650       for (unsigned n = Chain.getNumOperands(); n;)
   11651         Chains.push_back(Chain.getOperand(--n));
   11652       ++Depth;
   11653       break;
   11654 
   11655     default:
   11656       // For all other instructions we will just have to take what we can get.
   11657       Aliases.push_back(Chain);
   11658       break;
   11659     }
   11660   }
   11661 
   11662   // We need to be careful here to also search for aliases through the
   11663   // value operand of a store, etc. Consider the following situation:
   11664   //   Token1 = ...
   11665   //   L1 = load Token1, %52
   11666   //   S1 = store Token1, L1, %51
   11667   //   L2 = load Token1, %52+8
   11668   //   S2 = store Token1, L2, %51+8
   11669   //   Token2 = Token(S1, S2)
   11670   //   L3 = load Token2, %53
   11671   //   S3 = store Token2, L3, %52
   11672   //   L4 = load Token2, %53+8
   11673   //   S4 = store Token2, L4, %52+8
   11674   // If we search for aliases of S3 (which loads address %52), and we look
   11675   // only through the chain, then we'll miss the trivial dependence on L1
   11676   // (which also loads from %52). We then might change all loads and
   11677   // stores to use Token1 as their chain operand, which could result in
   11678   // copying %53 into %52 before copying %52 into %51 (which should
   11679   // happen first).
   11680   //
   11681   // The problem is, however, that searching for such data dependencies
   11682   // can become expensive, and the cost is not directly related to the
   11683   // chain depth. Instead, we'll rule out such configurations here by
   11684   // insisting that we've visited all chain users (except for users
   11685   // of the original chain, which is not necessary). When doing this,
   11686   // we need to look through nodes we don't care about (otherwise, things
   11687   // like register copies will interfere with trivial cases).
   11688 
   11689   SmallVector<const SDNode *, 16> Worklist;
   11690   for (SmallPtrSet<SDNode *, 16>::iterator I = Visited.begin(),
   11691        IE = Visited.end(); I != IE; ++I)
   11692     if (*I != OriginalChain.getNode())
   11693       Worklist.push_back(*I);
   11694 
   11695   while (!Worklist.empty()) {
   11696     const SDNode *M = Worklist.pop_back_val();
   11697 
   11698     // We have already visited M, and want to make sure we've visited any uses
   11699     // of M that we care about. For uses that we've not visisted, and don't
   11700     // care about, queue them to the worklist.
   11701 
   11702     for (SDNode::use_iterator UI = M->use_begin(),
   11703          UIE = M->use_end(); UI != UIE; ++UI)
   11704       if (UI.getUse().getValueType() == MVT::Other && Visited.insert(*UI)) {
   11705         if (isa<MemIntrinsicSDNode>(*UI) || isa<MemSDNode>(*UI)) {
   11706           // We've not visited this use, and we care about it (it could have an
   11707           // ordering dependency with the original node).
   11708           Aliases.clear();
   11709           Aliases.push_back(OriginalChain);
   11710           return;
   11711         }
   11712 
   11713         // We've not visited this use, but we don't care about it. Mark it as
   11714         // visited and enqueue it to the worklist.
   11715         Worklist.push_back(*UI);
   11716       }
   11717   }
   11718 }
   11719 
   11720 /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
   11721 /// for a better chain (aliasing node.)
   11722 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
   11723   SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
   11724 
   11725   // Accumulate all the aliases to this node.
   11726   GatherAllAliases(N, OldChain, Aliases);
   11727 
   11728   // If no operands then chain to entry token.
   11729   if (Aliases.size() == 0)
   11730     return DAG.getEntryNode();
   11731 
   11732   // If a single operand then chain to it.  We don't need to revisit it.
   11733   if (Aliases.size() == 1)
   11734     return Aliases[0];
   11735 
   11736   // Construct a custom tailored token factor.
   11737   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
   11738 }
   11739 
   11740 // SelectionDAG::Combine - This is the entry point for the file.
   11741 //
   11742 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
   11743                            CodeGenOpt::Level OptLevel) {
   11744   /// run - This is the main entry point to this class.
   11745   ///
   11746   DAGCombiner(*this, AA, OptLevel).Run(Level);
   11747 }
   11748