Home | History | Annotate | Download | only in SelectionDAG
      1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
     11 // both before and after the DAG is legalized.
     12 //
     13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
     14 // primarily intended to handle simplification opportunities that are implicit
     15 // in the LLVM IR and exposed by the various codegen lowering phases.
     16 //
     17 //===----------------------------------------------------------------------===//
     18 
     19 #include "llvm/CodeGen/SelectionDAG.h"
     20 #include "llvm/ADT/SetVector.h"
     21 #include "llvm/ADT/SmallBitVector.h"
     22 #include "llvm/ADT/SmallPtrSet.h"
     23 #include "llvm/ADT/Statistic.h"
     24 #include "llvm/Analysis/AliasAnalysis.h"
     25 #include "llvm/CodeGen/MachineFrameInfo.h"
     26 #include "llvm/CodeGen/MachineFunction.h"
     27 #include "llvm/IR/DataLayout.h"
     28 #include "llvm/IR/DerivedTypes.h"
     29 #include "llvm/IR/Function.h"
     30 #include "llvm/IR/LLVMContext.h"
     31 #include "llvm/Support/CommandLine.h"
     32 #include "llvm/Support/Debug.h"
     33 #include "llvm/Support/ErrorHandling.h"
     34 #include "llvm/Support/MathExtras.h"
     35 #include "llvm/Support/raw_ostream.h"
     36 #include "llvm/Target/TargetLowering.h"
     37 #include "llvm/Target/TargetOptions.h"
     38 #include "llvm/Target/TargetRegisterInfo.h"
     39 #include "llvm/Target/TargetSubtargetInfo.h"
     40 #include <algorithm>
     41 using namespace llvm;
     42 
     43 #define DEBUG_TYPE "dagcombine"
     44 
     45 STATISTIC(NodesCombined   , "Number of dag nodes combined");
     46 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
     47 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
     48 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
     49 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
     50 STATISTIC(SlicedLoads, "Number of load sliced");
     51 
     52 namespace {
     53   static cl::opt<bool>
     54     CombinerAA("combiner-alias-analysis", cl::Hidden,
     55                cl::desc("Enable DAG combiner alias-analysis heuristics"));
     56 
     57   static cl::opt<bool>
     58     CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
     59                cl::desc("Enable DAG combiner's use of IR alias analysis"));
     60 
     61   static cl::opt<bool>
     62     UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
     63                cl::desc("Enable DAG combiner's use of TBAA"));
     64 
     65 #ifndef NDEBUG
     66   static cl::opt<std::string>
     67     CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
     68                cl::desc("Only use DAG-combiner alias analysis in this"
     69                         " function"));
     70 #endif
     71 
     72   /// Hidden option to stress test load slicing, i.e., when this option
     73   /// is enabled, load slicing bypasses most of its profitability guards.
     74   static cl::opt<bool>
     75   StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
     76                     cl::desc("Bypass the profitability model of load "
     77                              "slicing"),
     78                     cl::init(false));
     79 
     80   static cl::opt<bool>
     81     MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
     82                       cl::desc("DAG combiner may split indexing from loads"));
     83 
     84 //------------------------------ DAGCombiner ---------------------------------//
     85 
     86   class DAGCombiner {
     87     SelectionDAG &DAG;
     88     const TargetLowering &TLI;
     89     CombineLevel Level;
     90     CodeGenOpt::Level OptLevel;
     91     bool LegalOperations;
     92     bool LegalTypes;
     93     bool ForCodeSize;
     94 
     95     /// \brief Worklist of all of the nodes that need to be simplified.
     96     ///
     97     /// This must behave as a stack -- new nodes to process are pushed onto the
     98     /// back and when processing we pop off of the back.
     99     ///
    100     /// The worklist will not contain duplicates but may contain null entries
    101     /// due to nodes being deleted from the underlying DAG.
    102     SmallVector<SDNode *, 64> Worklist;
    103 
    104     /// \brief Mapping from an SDNode to its position on the worklist.
    105     ///
    106     /// This is used to find and remove nodes from the worklist (by nulling
    107     /// them) when they are deleted from the underlying DAG. It relies on
    108     /// stable indices of nodes within the worklist.
    109     DenseMap<SDNode *, unsigned> WorklistMap;
    110 
    111     /// \brief Set of nodes which have been combined (at least once).
    112     ///
    113     /// This is used to allow us to reliably add any operands of a DAG node
    114     /// which have not yet been combined to the worklist.
    115     SmallPtrSet<SDNode *, 64> CombinedNodes;
    116 
    117     // AA - Used for DAG load/store alias analysis.
    118     AliasAnalysis &AA;
    119 
    120     /// When an instruction is simplified, add all users of the instruction to
    121     /// the work lists because they might get more simplified now.
    122     void AddUsersToWorklist(SDNode *N) {
    123       for (SDNode *Node : N->uses())
    124         AddToWorklist(Node);
    125     }
    126 
    127     /// Call the node-specific routine that folds each particular type of node.
    128     SDValue visit(SDNode *N);
    129 
    130   public:
    131     /// Add to the worklist making sure its instance is at the back (next to be
    132     /// processed.)
    133     void AddToWorklist(SDNode *N) {
    134       // Skip handle nodes as they can't usefully be combined and confuse the
    135       // zero-use deletion strategy.
    136       if (N->getOpcode() == ISD::HANDLENODE)
    137         return;
    138 
    139       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
    140         Worklist.push_back(N);
    141     }
    142 
    143     /// Remove all instances of N from the worklist.
    144     void removeFromWorklist(SDNode *N) {
    145       CombinedNodes.erase(N);
    146 
    147       auto It = WorklistMap.find(N);
    148       if (It == WorklistMap.end())
    149         return; // Not in the worklist.
    150 
    151       // Null out the entry rather than erasing it to avoid a linear operation.
    152       Worklist[It->second] = nullptr;
    153       WorklistMap.erase(It);
    154     }
    155 
    156     void deleteAndRecombine(SDNode *N);
    157     bool recursivelyDeleteUnusedNodes(SDNode *N);
    158 
    159     /// Replaces all uses of the results of one DAG node with new values.
    160     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
    161                       bool AddTo = true);
    162 
    163     /// Replaces all uses of the results of one DAG node with new values.
    164     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
    165       return CombineTo(N, &Res, 1, AddTo);
    166     }
    167 
    168     /// Replaces all uses of the results of one DAG node with new values.
    169     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
    170                       bool AddTo = true) {
    171       SDValue To[] = { Res0, Res1 };
    172       return CombineTo(N, To, 2, AddTo);
    173     }
    174 
    175     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
    176 
    177   private:
    178 
    179     /// Check the specified integer node value to see if it can be simplified or
    180     /// if things it uses can be simplified by bit propagation.
    181     /// If so, return true.
    182     bool SimplifyDemandedBits(SDValue Op) {
    183       unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
    184       APInt Demanded = APInt::getAllOnesValue(BitWidth);
    185       return SimplifyDemandedBits(Op, Demanded);
    186     }
    187 
    188     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
    189 
    190     bool CombineToPreIndexedLoadStore(SDNode *N);
    191     bool CombineToPostIndexedLoadStore(SDNode *N);
    192     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
    193     bool SliceUpLoad(SDNode *N);
    194 
    195     /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
    196     ///   load.
    197     ///
    198     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
    199     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
    200     /// \param EltNo index of the vector element to load.
    201     /// \param OriginalLoad load that EVE came from to be replaced.
    202     /// \returns EVE on success SDValue() on failure.
    203     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
    204         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
    205     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
    206     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
    207     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
    208     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
    209     SDValue PromoteIntBinOp(SDValue Op);
    210     SDValue PromoteIntShiftOp(SDValue Op);
    211     SDValue PromoteExtend(SDValue Op);
    212     bool PromoteLoad(SDValue Op);
    213 
    214     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
    215                          SDValue Trunc, SDValue ExtLoad, SDLoc DL,
    216                          ISD::NodeType ExtType);
    217 
    218     /// Call the node-specific routine that knows how to fold each
    219     /// particular type of node. If that doesn't do anything, try the
    220     /// target-specific DAG combines.
    221     SDValue combine(SDNode *N);
    222 
    223     // Visitation implementation - Implement dag node combining for different
    224     // node types.  The semantics are as follows:
    225     // Return Value:
    226     //   SDValue.getNode() == 0 - No change was made
    227     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
    228     //   otherwise              - N should be replaced by the returned Operand.
    229     //
    230     SDValue visitTokenFactor(SDNode *N);
    231     SDValue visitMERGE_VALUES(SDNode *N);
    232     SDValue visitADD(SDNode *N);
    233     SDValue visitSUB(SDNode *N);
    234     SDValue visitADDC(SDNode *N);
    235     SDValue visitSUBC(SDNode *N);
    236     SDValue visitADDE(SDNode *N);
    237     SDValue visitSUBE(SDNode *N);
    238     SDValue visitMUL(SDNode *N);
    239     SDValue useDivRem(SDNode *N);
    240     SDValue visitSDIV(SDNode *N);
    241     SDValue visitUDIV(SDNode *N);
    242     SDValue visitREM(SDNode *N);
    243     SDValue visitMULHU(SDNode *N);
    244     SDValue visitMULHS(SDNode *N);
    245     SDValue visitSMUL_LOHI(SDNode *N);
    246     SDValue visitUMUL_LOHI(SDNode *N);
    247     SDValue visitSMULO(SDNode *N);
    248     SDValue visitUMULO(SDNode *N);
    249     SDValue visitIMINMAX(SDNode *N);
    250     SDValue visitAND(SDNode *N);
    251     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
    252     SDValue visitOR(SDNode *N);
    253     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
    254     SDValue visitXOR(SDNode *N);
    255     SDValue SimplifyVBinOp(SDNode *N);
    256     SDValue visitSHL(SDNode *N);
    257     SDValue visitSRA(SDNode *N);
    258     SDValue visitSRL(SDNode *N);
    259     SDValue visitRotate(SDNode *N);
    260     SDValue visitBSWAP(SDNode *N);
    261     SDValue visitCTLZ(SDNode *N);
    262     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
    263     SDValue visitCTTZ(SDNode *N);
    264     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
    265     SDValue visitCTPOP(SDNode *N);
    266     SDValue visitSELECT(SDNode *N);
    267     SDValue visitVSELECT(SDNode *N);
    268     SDValue visitSELECT_CC(SDNode *N);
    269     SDValue visitSETCC(SDNode *N);
    270     SDValue visitSETCCE(SDNode *N);
    271     SDValue visitSIGN_EXTEND(SDNode *N);
    272     SDValue visitZERO_EXTEND(SDNode *N);
    273     SDValue visitANY_EXTEND(SDNode *N);
    274     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
    275     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
    276     SDValue visitTRUNCATE(SDNode *N);
    277     SDValue visitBITCAST(SDNode *N);
    278     SDValue visitBUILD_PAIR(SDNode *N);
    279     SDValue visitFADD(SDNode *N);
    280     SDValue visitFSUB(SDNode *N);
    281     SDValue visitFMUL(SDNode *N);
    282     SDValue visitFMA(SDNode *N);
    283     SDValue visitFDIV(SDNode *N);
    284     SDValue visitFREM(SDNode *N);
    285     SDValue visitFSQRT(SDNode *N);
    286     SDValue visitFCOPYSIGN(SDNode *N);
    287     SDValue visitSINT_TO_FP(SDNode *N);
    288     SDValue visitUINT_TO_FP(SDNode *N);
    289     SDValue visitFP_TO_SINT(SDNode *N);
    290     SDValue visitFP_TO_UINT(SDNode *N);
    291     SDValue visitFP_ROUND(SDNode *N);
    292     SDValue visitFP_ROUND_INREG(SDNode *N);
    293     SDValue visitFP_EXTEND(SDNode *N);
    294     SDValue visitFNEG(SDNode *N);
    295     SDValue visitFABS(SDNode *N);
    296     SDValue visitFCEIL(SDNode *N);
    297     SDValue visitFTRUNC(SDNode *N);
    298     SDValue visitFFLOOR(SDNode *N);
    299     SDValue visitFMINNUM(SDNode *N);
    300     SDValue visitFMAXNUM(SDNode *N);
    301     SDValue visitBRCOND(SDNode *N);
    302     SDValue visitBR_CC(SDNode *N);
    303     SDValue visitLOAD(SDNode *N);
    304 
    305     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
    306     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
    307 
    308     SDValue visitSTORE(SDNode *N);
    309     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
    310     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
    311     SDValue visitBUILD_VECTOR(SDNode *N);
    312     SDValue visitCONCAT_VECTORS(SDNode *N);
    313     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
    314     SDValue visitVECTOR_SHUFFLE(SDNode *N);
    315     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
    316     SDValue visitINSERT_SUBVECTOR(SDNode *N);
    317     SDValue visitMLOAD(SDNode *N);
    318     SDValue visitMSTORE(SDNode *N);
    319     SDValue visitMGATHER(SDNode *N);
    320     SDValue visitMSCATTER(SDNode *N);
    321     SDValue visitFP_TO_FP16(SDNode *N);
    322     SDValue visitFP16_TO_FP(SDNode *N);
    323 
    324     SDValue visitFADDForFMACombine(SDNode *N);
    325     SDValue visitFSUBForFMACombine(SDNode *N);
    326     SDValue visitFMULForFMACombine(SDNode *N);
    327 
    328     SDValue XformToShuffleWithZero(SDNode *N);
    329     SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
    330 
    331     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
    332 
    333     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
    334     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
    335     SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2);
    336     SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2,
    337                              SDValue N3, ISD::CondCode CC,
    338                              bool NotExtCompare = false);
    339     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
    340                           SDLoc DL, bool foldBooleans = true);
    341 
    342     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
    343                            SDValue &CC) const;
    344     bool isOneUseSetCC(SDValue N) const;
    345 
    346     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
    347                                          unsigned HiOp);
    348     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
    349     SDValue CombineExtLoad(SDNode *N);
    350     SDValue combineRepeatedFPDivisors(SDNode *N);
    351     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
    352     SDValue BuildSDIV(SDNode *N);
    353     SDValue BuildSDIVPow2(SDNode *N);
    354     SDValue BuildUDIV(SDNode *N);
    355     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
    356     SDValue BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
    357     SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
    358                                  SDNodeFlags *Flags);
    359     SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
    360                                  SDNodeFlags *Flags);
    361     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
    362                                bool DemandHighBits = true);
    363     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
    364     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
    365                               SDValue InnerPos, SDValue InnerNeg,
    366                               unsigned PosOpcode, unsigned NegOpcode,
    367                               SDLoc DL);
    368     SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL);
    369     SDValue ReduceLoadWidth(SDNode *N);
    370     SDValue ReduceLoadOpStoreWidth(SDNode *N);
    371     SDValue TransformFPLoadStorePair(SDNode *N);
    372     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
    373     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
    374 
    375     SDValue GetDemandedBits(SDValue V, const APInt &Mask);
    376 
    377     /// Walk up chain skipping non-aliasing memory nodes,
    378     /// looking for aliasing nodes and adding them to the Aliases vector.
    379     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
    380                           SmallVectorImpl<SDValue> &Aliases);
    381 
    382     /// Return true if there is any possibility that the two addresses overlap.
    383     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
    384 
    385     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
    386     /// chain (aliasing node.)
    387     SDValue FindBetterChain(SDNode *N, SDValue Chain);
    388 
    389     /// Do FindBetterChain for a store and any possibly adjacent stores on
    390     /// consecutive chains.
    391     bool findBetterNeighborChains(StoreSDNode *St);
    392 
    393     /// Holds a pointer to an LSBaseSDNode as well as information on where it
    394     /// is located in a sequence of memory operations connected by a chain.
    395     struct MemOpLink {
    396       MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
    397       MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
    398       // Ptr to the mem node.
    399       LSBaseSDNode *MemNode;
    400       // Offset from the base ptr.
    401       int64_t OffsetFromBase;
    402       // What is the sequence number of this mem node.
    403       // Lowest mem operand in the DAG starts at zero.
    404       unsigned SequenceNum;
    405     };
    406 
    407     /// This is a helper function for visitMUL to check the profitability
    408     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
    409     /// MulNode is the original multiply, AddNode is (add x, c1),
    410     /// and ConstNode is c2.
    411     bool isMulAddWithConstProfitable(SDNode *MulNode,
    412                                      SDValue &AddNode,
    413                                      SDValue &ConstNode);
    414 
    415     /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
    416     /// constant build_vector of the stored constant values in Stores.
    417     SDValue getMergedConstantVectorStore(SelectionDAG &DAG,
    418                                          SDLoc SL,
    419                                          ArrayRef<MemOpLink> Stores,
    420                                          SmallVectorImpl<SDValue> &Chains,
    421                                          EVT Ty) const;
    422 
    423     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
    424     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
    425     /// the type of the loaded value to be extended.  LoadedVT returns the type
    426     /// of the original loaded value.  NarrowLoad returns whether the load would
    427     /// need to be narrowed in order to match.
    428     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
    429                           EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
    430                           bool &NarrowLoad);
    431 
    432     /// This is a helper function for MergeConsecutiveStores. When the source
    433     /// elements of the consecutive stores are all constants or all extracted
    434     /// vector elements, try to merge them into one larger store.
    435     /// \return True if a merged store was created.
    436     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
    437                                          EVT MemVT, unsigned NumStores,
    438                                          bool IsConstantSrc, bool UseVector);
    439 
    440     /// This is a helper function for MergeConsecutiveStores.
    441     /// Stores that may be merged are placed in StoreNodes.
    442     /// Loads that may alias with those stores are placed in AliasLoadNodes.
    443     void getStoreMergeAndAliasCandidates(
    444         StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
    445         SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
    446 
    447     /// Merge consecutive store operations into a wide store.
    448     /// This optimization uses wide integers or vectors when possible.
    449     /// \return True if some memory operations were changed.
    450     bool MergeConsecutiveStores(StoreSDNode *N);
    451 
    452     /// \brief Try to transform a truncation where C is a constant:
    453     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
    454     ///
    455     /// \p N needs to be a truncation and its first operand an AND. Other
    456     /// requirements are checked by the function (e.g. that trunc is
    457     /// single-use) and if missed an empty SDValue is returned.
    458     SDValue distributeTruncateThroughAnd(SDNode *N);
    459 
    460   public:
    461     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
    462         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
    463           OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
    464       ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
    465     }
    466 
    467     /// Runs the dag combiner on all nodes in the work list
    468     void Run(CombineLevel AtLevel);
    469 
    470     SelectionDAG &getDAG() const { return DAG; }
    471 
    472     /// Returns a type large enough to hold any valid shift amount - before type
    473     /// legalization these can be huge.
    474     EVT getShiftAmountTy(EVT LHSTy) {
    475       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
    476       if (LHSTy.isVector())
    477         return LHSTy;
    478       auto &DL = DAG.getDataLayout();
    479       return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
    480                         : TLI.getPointerTy(DL);
    481     }
    482 
    483     /// This method returns true if we are running before type legalization or
    484     /// if the specified VT is legal.
    485     bool isTypeLegal(const EVT &VT) {
    486       if (!LegalTypes) return true;
    487       return TLI.isTypeLegal(VT);
    488     }
    489 
    490     /// Convenience wrapper around TargetLowering::getSetCCResultType
    491     EVT getSetCCResultType(EVT VT) const {
    492       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
    493     }
    494   };
    495 }
    496 
    497 
    498 namespace {
    499 /// This class is a DAGUpdateListener that removes any deleted
    500 /// nodes from the worklist.
    501 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
    502   DAGCombiner &DC;
    503 public:
    504   explicit WorklistRemover(DAGCombiner &dc)
    505     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
    506 
    507   void NodeDeleted(SDNode *N, SDNode *E) override {
    508     DC.removeFromWorklist(N);
    509   }
    510 };
    511 }
    512 
    513 //===----------------------------------------------------------------------===//
    514 //  TargetLowering::DAGCombinerInfo implementation
    515 //===----------------------------------------------------------------------===//
    516 
    517 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
    518   ((DAGCombiner*)DC)->AddToWorklist(N);
    519 }
    520 
    521 void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
    522   ((DAGCombiner*)DC)->removeFromWorklist(N);
    523 }
    524 
    525 SDValue TargetLowering::DAGCombinerInfo::
    526 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
    527   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
    528 }
    529 
    530 SDValue TargetLowering::DAGCombinerInfo::
    531 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
    532   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
    533 }
    534 
    535 
    536 SDValue TargetLowering::DAGCombinerInfo::
    537 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
    538   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
    539 }
    540 
    541 void TargetLowering::DAGCombinerInfo::
    542 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
    543   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
    544 }
    545 
    546 //===----------------------------------------------------------------------===//
    547 // Helper Functions
    548 //===----------------------------------------------------------------------===//
    549 
    550 void DAGCombiner::deleteAndRecombine(SDNode *N) {
    551   removeFromWorklist(N);
    552 
    553   // If the operands of this node are only used by the node, they will now be
    554   // dead. Make sure to re-visit them and recursively delete dead nodes.
    555   for (const SDValue &Op : N->ops())
    556     // For an operand generating multiple values, one of the values may
    557     // become dead allowing further simplification (e.g. split index
    558     // arithmetic from an indexed load).
    559     if (Op->hasOneUse() || Op->getNumValues() > 1)
    560       AddToWorklist(Op.getNode());
    561 
    562   DAG.DeleteNode(N);
    563 }
    564 
    565 /// Return 1 if we can compute the negated form of the specified expression for
    566 /// the same cost as the expression itself, or 2 if we can compute the negated
    567 /// form more cheaply than the expression itself.
    568 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
    569                                const TargetLowering &TLI,
    570                                const TargetOptions *Options,
    571                                unsigned Depth = 0) {
    572   // fneg is removable even if it has multiple uses.
    573   if (Op.getOpcode() == ISD::FNEG) return 2;
    574 
    575   // Don't allow anything with multiple uses.
    576   if (!Op.hasOneUse()) return 0;
    577 
    578   // Don't recurse exponentially.
    579   if (Depth > 6) return 0;
    580 
    581   switch (Op.getOpcode()) {
    582   default: return false;
    583   case ISD::ConstantFP:
    584     // Don't invert constant FP values after legalize.  The negated constant
    585     // isn't necessarily legal.
    586     return LegalOperations ? 0 : 1;
    587   case ISD::FADD:
    588     // FIXME: determine better conditions for this xform.
    589     if (!Options->UnsafeFPMath) return 0;
    590 
    591     // After operation legalization, it might not be legal to create new FSUBs.
    592     if (LegalOperations &&
    593         !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
    594       return 0;
    595 
    596     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
    597     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
    598                                     Options, Depth + 1))
    599       return V;
    600     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
    601     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
    602                               Depth + 1);
    603   case ISD::FSUB:
    604     // We can't turn -(A-B) into B-A when we honor signed zeros.
    605     if (!Options->UnsafeFPMath) return 0;
    606 
    607     // fold (fneg (fsub A, B)) -> (fsub B, A)
    608     return 1;
    609 
    610   case ISD::FMUL:
    611   case ISD::FDIV:
    612     if (Options->HonorSignDependentRoundingFPMath()) return 0;
    613 
    614     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
    615     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
    616                                     Options, Depth + 1))
    617       return V;
    618 
    619     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
    620                               Depth + 1);
    621 
    622   case ISD::FP_EXTEND:
    623   case ISD::FP_ROUND:
    624   case ISD::FSIN:
    625     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
    626                               Depth + 1);
    627   }
    628 }
    629 
    630 /// If isNegatibleForFree returns true, return the newly negated expression.
    631 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
    632                                     bool LegalOperations, unsigned Depth = 0) {
    633   const TargetOptions &Options = DAG.getTarget().Options;
    634   // fneg is removable even if it has multiple uses.
    635   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
    636 
    637   // Don't allow anything with multiple uses.
    638   assert(Op.hasOneUse() && "Unknown reuse!");
    639 
    640   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
    641 
    642   const SDNodeFlags *Flags = Op.getNode()->getFlags();
    643 
    644   switch (Op.getOpcode()) {
    645   default: llvm_unreachable("Unknown code");
    646   case ISD::ConstantFP: {
    647     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
    648     V.changeSign();
    649     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
    650   }
    651   case ISD::FADD:
    652     // FIXME: determine better conditions for this xform.
    653     assert(Options.UnsafeFPMath);
    654 
    655     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
    656     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
    657                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
    658       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
    659                          GetNegatedExpression(Op.getOperand(0), DAG,
    660                                               LegalOperations, Depth+1),
    661                          Op.getOperand(1), Flags);
    662     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
    663     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
    664                        GetNegatedExpression(Op.getOperand(1), DAG,
    665                                             LegalOperations, Depth+1),
    666                        Op.getOperand(0), Flags);
    667   case ISD::FSUB:
    668     // We can't turn -(A-B) into B-A when we honor signed zeros.
    669     assert(Options.UnsafeFPMath);
    670 
    671     // fold (fneg (fsub 0, B)) -> B
    672     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
    673       if (N0CFP->isZero())
    674         return Op.getOperand(1);
    675 
    676     // fold (fneg (fsub A, B)) -> (fsub B, A)
    677     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
    678                        Op.getOperand(1), Op.getOperand(0), Flags);
    679 
    680   case ISD::FMUL:
    681   case ISD::FDIV:
    682     assert(!Options.HonorSignDependentRoundingFPMath());
    683 
    684     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
    685     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
    686                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
    687       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
    688                          GetNegatedExpression(Op.getOperand(0), DAG,
    689                                               LegalOperations, Depth+1),
    690                          Op.getOperand(1), Flags);
    691 
    692     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
    693     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
    694                        Op.getOperand(0),
    695                        GetNegatedExpression(Op.getOperand(1), DAG,
    696                                             LegalOperations, Depth+1), Flags);
    697 
    698   case ISD::FP_EXTEND:
    699   case ISD::FSIN:
    700     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
    701                        GetNegatedExpression(Op.getOperand(0), DAG,
    702                                             LegalOperations, Depth+1));
    703   case ISD::FP_ROUND:
    704       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
    705                          GetNegatedExpression(Op.getOperand(0), DAG,
    706                                               LegalOperations, Depth+1),
    707                          Op.getOperand(1));
    708   }
    709 }
    710 
    711 // Return true if this node is a setcc, or is a select_cc
    712 // that selects between the target values used for true and false, making it
    713 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
    714 // the appropriate nodes based on the type of node we are checking. This
    715 // simplifies life a bit for the callers.
    716 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
    717                                     SDValue &CC) const {
    718   if (N.getOpcode() == ISD::SETCC) {
    719     LHS = N.getOperand(0);
    720     RHS = N.getOperand(1);
    721     CC  = N.getOperand(2);
    722     return true;
    723   }
    724 
    725   if (N.getOpcode() != ISD::SELECT_CC ||
    726       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
    727       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
    728     return false;
    729 
    730   if (TLI.getBooleanContents(N.getValueType()) ==
    731       TargetLowering::UndefinedBooleanContent)
    732     return false;
    733 
    734   LHS = N.getOperand(0);
    735   RHS = N.getOperand(1);
    736   CC  = N.getOperand(4);
    737   return true;
    738 }
    739 
    740 /// Return true if this is a SetCC-equivalent operation with only one use.
    741 /// If this is true, it allows the users to invert the operation for free when
    742 /// it is profitable to do so.
    743 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
    744   SDValue N0, N1, N2;
    745   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
    746     return true;
    747   return false;
    748 }
    749 
    750 /// Returns true if N is a BUILD_VECTOR node whose
    751 /// elements are all the same constant or undefined.
    752 static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
    753   BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
    754   if (!C)
    755     return false;
    756 
    757   APInt SplatUndef;
    758   unsigned SplatBitSize;
    759   bool HasAnyUndefs;
    760   EVT EltVT = N->getValueType(0).getVectorElementType();
    761   return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
    762                              HasAnyUndefs) &&
    763           EltVT.getSizeInBits() >= SplatBitSize);
    764 }
    765 
    766 // \brief Returns the SDNode if it is a constant integer BuildVector
    767 // or constant integer.
    768 static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) {
    769   if (isa<ConstantSDNode>(N))
    770     return N.getNode();
    771   if (ISD::isBuildVectorOfConstantSDNodes(N.getNode()))
    772     return N.getNode();
    773   return nullptr;
    774 }
    775 
    776 // \brief Returns the SDNode if it is a constant float BuildVector
    777 // or constant float.
    778 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
    779   if (isa<ConstantFPSDNode>(N))
    780     return N.getNode();
    781   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
    782     return N.getNode();
    783   return nullptr;
    784 }
    785 
    786 // \brief Returns the SDNode if it is a constant splat BuildVector or constant
    787 // int.
    788 static ConstantSDNode *isConstOrConstSplat(SDValue N) {
    789   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
    790     return CN;
    791 
    792   if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
    793     BitVector UndefElements;
    794     ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
    795 
    796     // BuildVectors can truncate their operands. Ignore that case here.
    797     // FIXME: We blindly ignore splats which include undef which is overly
    798     // pessimistic.
    799     if (CN && UndefElements.none() &&
    800         CN->getValueType(0) == N.getValueType().getScalarType())
    801       return CN;
    802   }
    803 
    804   return nullptr;
    805 }
    806 
    807 // \brief Returns the SDNode if it is a constant splat BuildVector or constant
    808 // float.
    809 static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) {
    810   if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
    811     return CN;
    812 
    813   if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
    814     BitVector UndefElements;
    815     ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
    816 
    817     if (CN && UndefElements.none())
    818       return CN;
    819   }
    820 
    821   return nullptr;
    822 }
    823 
    824 SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
    825                                     SDValue N0, SDValue N1) {
    826   EVT VT = N0.getValueType();
    827   if (N0.getOpcode() == Opc) {
    828     if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
    829       if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) {
    830         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
    831         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
    832           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
    833         return SDValue();
    834       }
    835       if (N0.hasOneUse()) {
    836         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
    837         // use
    838         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
    839         if (!OpNode.getNode())
    840           return SDValue();
    841         AddToWorklist(OpNode.getNode());
    842         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
    843       }
    844     }
    845   }
    846 
    847   if (N1.getOpcode() == Opc) {
    848     if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
    849       if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) {
    850         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
    851         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
    852           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
    853         return SDValue();
    854       }
    855       if (N1.hasOneUse()) {
    856         // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one
    857         // use
    858         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0);
    859         if (!OpNode.getNode())
    860           return SDValue();
    861         AddToWorklist(OpNode.getNode());
    862         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
    863       }
    864     }
    865   }
    866 
    867   return SDValue();
    868 }
    869 
    870 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
    871                                bool AddTo) {
    872   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
    873   ++NodesCombined;
    874   DEBUG(dbgs() << "\nReplacing.1 ";
    875         N->dump(&DAG);
    876         dbgs() << "\nWith: ";
    877         To[0].getNode()->dump(&DAG);
    878         dbgs() << " and " << NumTo-1 << " other values\n");
    879   for (unsigned i = 0, e = NumTo; i != e; ++i)
    880     assert((!To[i].getNode() ||
    881             N->getValueType(i) == To[i].getValueType()) &&
    882            "Cannot combine value to value of different type!");
    883 
    884   WorklistRemover DeadNodes(*this);
    885   DAG.ReplaceAllUsesWith(N, To);
    886   if (AddTo) {
    887     // Push the new nodes and any users onto the worklist
    888     for (unsigned i = 0, e = NumTo; i != e; ++i) {
    889       if (To[i].getNode()) {
    890         AddToWorklist(To[i].getNode());
    891         AddUsersToWorklist(To[i].getNode());
    892       }
    893     }
    894   }
    895 
    896   // Finally, if the node is now dead, remove it from the graph.  The node
    897   // may not be dead if the replacement process recursively simplified to
    898   // something else needing this node.
    899   if (N->use_empty())
    900     deleteAndRecombine(N);
    901   return SDValue(N, 0);
    902 }
    903 
    904 void DAGCombiner::
    905 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
    906   // Replace all uses.  If any nodes become isomorphic to other nodes and
    907   // are deleted, make sure to remove them from our worklist.
    908   WorklistRemover DeadNodes(*this);
    909   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
    910 
    911   // Push the new node and any (possibly new) users onto the worklist.
    912   AddToWorklist(TLO.New.getNode());
    913   AddUsersToWorklist(TLO.New.getNode());
    914 
    915   // Finally, if the node is now dead, remove it from the graph.  The node
    916   // may not be dead if the replacement process recursively simplified to
    917   // something else needing this node.
    918   if (TLO.Old.getNode()->use_empty())
    919     deleteAndRecombine(TLO.Old.getNode());
    920 }
    921 
    922 /// Check the specified integer node value to see if it can be simplified or if
    923 /// things it uses can be simplified by bit propagation. If so, return true.
    924 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
    925   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
    926   APInt KnownZero, KnownOne;
    927   if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
    928     return false;
    929 
    930   // Revisit the node.
    931   AddToWorklist(Op.getNode());
    932 
    933   // Replace the old value with the new one.
    934   ++NodesCombined;
    935   DEBUG(dbgs() << "\nReplacing.2 ";
    936         TLO.Old.getNode()->dump(&DAG);
    937         dbgs() << "\nWith: ";
    938         TLO.New.getNode()->dump(&DAG);
    939         dbgs() << '\n');
    940 
    941   CommitTargetLoweringOpt(TLO);
    942   return true;
    943 }
    944 
    945 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
    946   SDLoc dl(Load);
    947   EVT VT = Load->getValueType(0);
    948   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
    949 
    950   DEBUG(dbgs() << "\nReplacing.9 ";
    951         Load->dump(&DAG);
    952         dbgs() << "\nWith: ";
    953         Trunc.getNode()->dump(&DAG);
    954         dbgs() << '\n');
    955   WorklistRemover DeadNodes(*this);
    956   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
    957   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
    958   deleteAndRecombine(Load);
    959   AddToWorklist(Trunc.getNode());
    960 }
    961 
    962 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
    963   Replace = false;
    964   SDLoc dl(Op);
    965   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
    966     EVT MemVT = LD->getMemoryVT();
    967     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
    968       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
    969                                                        : ISD::EXTLOAD)
    970       : LD->getExtensionType();
    971     Replace = true;
    972     return DAG.getExtLoad(ExtType, dl, PVT,
    973                           LD->getChain(), LD->getBasePtr(),
    974                           MemVT, LD->getMemOperand());
    975   }
    976 
    977   unsigned Opc = Op.getOpcode();
    978   switch (Opc) {
    979   default: break;
    980   case ISD::AssertSext:
    981     return DAG.getNode(ISD::AssertSext, dl, PVT,
    982                        SExtPromoteOperand(Op.getOperand(0), PVT),
    983                        Op.getOperand(1));
    984   case ISD::AssertZext:
    985     return DAG.getNode(ISD::AssertZext, dl, PVT,
    986                        ZExtPromoteOperand(Op.getOperand(0), PVT),
    987                        Op.getOperand(1));
    988   case ISD::Constant: {
    989     unsigned ExtOpc =
    990       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
    991     return DAG.getNode(ExtOpc, dl, PVT, Op);
    992   }
    993   }
    994 
    995   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
    996     return SDValue();
    997   return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
    998 }
    999 
   1000 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
   1001   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
   1002     return SDValue();
   1003   EVT OldVT = Op.getValueType();
   1004   SDLoc dl(Op);
   1005   bool Replace = false;
   1006   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
   1007   if (!NewOp.getNode())
   1008     return SDValue();
   1009   AddToWorklist(NewOp.getNode());
   1010 
   1011   if (Replace)
   1012     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
   1013   return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
   1014                      DAG.getValueType(OldVT));
   1015 }
   1016 
   1017 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
   1018   EVT OldVT = Op.getValueType();
   1019   SDLoc dl(Op);
   1020   bool Replace = false;
   1021   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
   1022   if (!NewOp.getNode())
   1023     return SDValue();
   1024   AddToWorklist(NewOp.getNode());
   1025 
   1026   if (Replace)
   1027     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
   1028   return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
   1029 }
   1030 
   1031 /// Promote the specified integer binary operation if the target indicates it is
   1032 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
   1033 /// i32 since i16 instructions are longer.
   1034 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
   1035   if (!LegalOperations)
   1036     return SDValue();
   1037 
   1038   EVT VT = Op.getValueType();
   1039   if (VT.isVector() || !VT.isInteger())
   1040     return SDValue();
   1041 
   1042   // If operation type is 'undesirable', e.g. i16 on x86, consider
   1043   // promoting it.
   1044   unsigned Opc = Op.getOpcode();
   1045   if (TLI.isTypeDesirableForOp(Opc, VT))
   1046     return SDValue();
   1047 
   1048   EVT PVT = VT;
   1049   // Consult target whether it is a good idea to promote this operation and
   1050   // what's the right type to promote it to.
   1051   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
   1052     assert(PVT != VT && "Don't know what type to promote to!");
   1053 
   1054     bool Replace0 = false;
   1055     SDValue N0 = Op.getOperand(0);
   1056     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
   1057     if (!NN0.getNode())
   1058       return SDValue();
   1059 
   1060     bool Replace1 = false;
   1061     SDValue N1 = Op.getOperand(1);
   1062     SDValue NN1;
   1063     if (N0 == N1)
   1064       NN1 = NN0;
   1065     else {
   1066       NN1 = PromoteOperand(N1, PVT, Replace1);
   1067       if (!NN1.getNode())
   1068         return SDValue();
   1069     }
   1070 
   1071     AddToWorklist(NN0.getNode());
   1072     if (NN1.getNode())
   1073       AddToWorklist(NN1.getNode());
   1074 
   1075     if (Replace0)
   1076       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
   1077     if (Replace1)
   1078       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
   1079 
   1080     DEBUG(dbgs() << "\nPromoting ";
   1081           Op.getNode()->dump(&DAG));
   1082     SDLoc dl(Op);
   1083     return DAG.getNode(ISD::TRUNCATE, dl, VT,
   1084                        DAG.getNode(Opc, dl, PVT, NN0, NN1));
   1085   }
   1086   return SDValue();
   1087 }
   1088 
   1089 /// Promote the specified integer shift operation if the target indicates it is
   1090 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
   1091 /// i32 since i16 instructions are longer.
   1092 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
   1093   if (!LegalOperations)
   1094     return SDValue();
   1095 
   1096   EVT VT = Op.getValueType();
   1097   if (VT.isVector() || !VT.isInteger())
   1098     return SDValue();
   1099 
   1100   // If operation type is 'undesirable', e.g. i16 on x86, consider
   1101   // promoting it.
   1102   unsigned Opc = Op.getOpcode();
   1103   if (TLI.isTypeDesirableForOp(Opc, VT))
   1104     return SDValue();
   1105 
   1106   EVT PVT = VT;
   1107   // Consult target whether it is a good idea to promote this operation and
   1108   // what's the right type to promote it to.
   1109   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
   1110     assert(PVT != VT && "Don't know what type to promote to!");
   1111 
   1112     bool Replace = false;
   1113     SDValue N0 = Op.getOperand(0);
   1114     if (Opc == ISD::SRA)
   1115       N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
   1116     else if (Opc == ISD::SRL)
   1117       N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
   1118     else
   1119       N0 = PromoteOperand(N0, PVT, Replace);
   1120     if (!N0.getNode())
   1121       return SDValue();
   1122 
   1123     AddToWorklist(N0.getNode());
   1124     if (Replace)
   1125       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
   1126 
   1127     DEBUG(dbgs() << "\nPromoting ";
   1128           Op.getNode()->dump(&DAG));
   1129     SDLoc dl(Op);
   1130     return DAG.getNode(ISD::TRUNCATE, dl, VT,
   1131                        DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
   1132   }
   1133   return SDValue();
   1134 }
   1135 
   1136 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
   1137   if (!LegalOperations)
   1138     return SDValue();
   1139 
   1140   EVT VT = Op.getValueType();
   1141   if (VT.isVector() || !VT.isInteger())
   1142     return SDValue();
   1143 
   1144   // If operation type is 'undesirable', e.g. i16 on x86, consider
   1145   // promoting it.
   1146   unsigned Opc = Op.getOpcode();
   1147   if (TLI.isTypeDesirableForOp(Opc, VT))
   1148     return SDValue();
   1149 
   1150   EVT PVT = VT;
   1151   // Consult target whether it is a good idea to promote this operation and
   1152   // what's the right type to promote it to.
   1153   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
   1154     assert(PVT != VT && "Don't know what type to promote to!");
   1155     // fold (aext (aext x)) -> (aext x)
   1156     // fold (aext (zext x)) -> (zext x)
   1157     // fold (aext (sext x)) -> (sext x)
   1158     DEBUG(dbgs() << "\nPromoting ";
   1159           Op.getNode()->dump(&DAG));
   1160     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
   1161   }
   1162   return SDValue();
   1163 }
   1164 
   1165 bool DAGCombiner::PromoteLoad(SDValue Op) {
   1166   if (!LegalOperations)
   1167     return false;
   1168 
   1169   EVT VT = Op.getValueType();
   1170   if (VT.isVector() || !VT.isInteger())
   1171     return false;
   1172 
   1173   // If operation type is 'undesirable', e.g. i16 on x86, consider
   1174   // promoting it.
   1175   unsigned Opc = Op.getOpcode();
   1176   if (TLI.isTypeDesirableForOp(Opc, VT))
   1177     return false;
   1178 
   1179   EVT PVT = VT;
   1180   // Consult target whether it is a good idea to promote this operation and
   1181   // what's the right type to promote it to.
   1182   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
   1183     assert(PVT != VT && "Don't know what type to promote to!");
   1184 
   1185     SDLoc dl(Op);
   1186     SDNode *N = Op.getNode();
   1187     LoadSDNode *LD = cast<LoadSDNode>(N);
   1188     EVT MemVT = LD->getMemoryVT();
   1189     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
   1190       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
   1191                                                        : ISD::EXTLOAD)
   1192       : LD->getExtensionType();
   1193     SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
   1194                                    LD->getChain(), LD->getBasePtr(),
   1195                                    MemVT, LD->getMemOperand());
   1196     SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
   1197 
   1198     DEBUG(dbgs() << "\nPromoting ";
   1199           N->dump(&DAG);
   1200           dbgs() << "\nTo: ";
   1201           Result.getNode()->dump(&DAG);
   1202           dbgs() << '\n');
   1203     WorklistRemover DeadNodes(*this);
   1204     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
   1205     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
   1206     deleteAndRecombine(N);
   1207     AddToWorklist(Result.getNode());
   1208     return true;
   1209   }
   1210   return false;
   1211 }
   1212 
   1213 /// \brief Recursively delete a node which has no uses and any operands for
   1214 /// which it is the only use.
   1215 ///
   1216 /// Note that this both deletes the nodes and removes them from the worklist.
   1217 /// It also adds any nodes who have had a user deleted to the worklist as they
   1218 /// may now have only one use and subject to other combines.
   1219 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
   1220   if (!N->use_empty())
   1221     return false;
   1222 
   1223   SmallSetVector<SDNode *, 16> Nodes;
   1224   Nodes.insert(N);
   1225   do {
   1226     N = Nodes.pop_back_val();
   1227     if (!N)
   1228       continue;
   1229 
   1230     if (N->use_empty()) {
   1231       for (const SDValue &ChildN : N->op_values())
   1232         Nodes.insert(ChildN.getNode());
   1233 
   1234       removeFromWorklist(N);
   1235       DAG.DeleteNode(N);
   1236     } else {
   1237       AddToWorklist(N);
   1238     }
   1239   } while (!Nodes.empty());
   1240   return true;
   1241 }
   1242 
   1243 //===----------------------------------------------------------------------===//
   1244 //  Main DAG Combiner implementation
   1245 //===----------------------------------------------------------------------===//
   1246 
   1247 void DAGCombiner::Run(CombineLevel AtLevel) {
   1248   // set the instance variables, so that the various visit routines may use it.
   1249   Level = AtLevel;
   1250   LegalOperations = Level >= AfterLegalizeVectorOps;
   1251   LegalTypes = Level >= AfterLegalizeTypes;
   1252 
   1253   // Add all the dag nodes to the worklist.
   1254   for (SDNode &Node : DAG.allnodes())
   1255     AddToWorklist(&Node);
   1256 
   1257   // Create a dummy node (which is not added to allnodes), that adds a reference
   1258   // to the root node, preventing it from being deleted, and tracking any
   1259   // changes of the root.
   1260   HandleSDNode Dummy(DAG.getRoot());
   1261 
   1262   // while the worklist isn't empty, find a node and
   1263   // try and combine it.
   1264   while (!WorklistMap.empty()) {
   1265     SDNode *N;
   1266     // The Worklist holds the SDNodes in order, but it may contain null entries.
   1267     do {
   1268       N = Worklist.pop_back_val();
   1269     } while (!N);
   1270 
   1271     bool GoodWorklistEntry = WorklistMap.erase(N);
   1272     (void)GoodWorklistEntry;
   1273     assert(GoodWorklistEntry &&
   1274            "Found a worklist entry without a corresponding map entry!");
   1275 
   1276     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
   1277     // N is deleted from the DAG, since they too may now be dead or may have a
   1278     // reduced number of uses, allowing other xforms.
   1279     if (recursivelyDeleteUnusedNodes(N))
   1280       continue;
   1281 
   1282     WorklistRemover DeadNodes(*this);
   1283 
   1284     // If this combine is running after legalizing the DAG, re-legalize any
   1285     // nodes pulled off the worklist.
   1286     if (Level == AfterLegalizeDAG) {
   1287       SmallSetVector<SDNode *, 16> UpdatedNodes;
   1288       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
   1289 
   1290       for (SDNode *LN : UpdatedNodes) {
   1291         AddToWorklist(LN);
   1292         AddUsersToWorklist(LN);
   1293       }
   1294       if (!NIsValid)
   1295         continue;
   1296     }
   1297 
   1298     DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
   1299 
   1300     // Add any operands of the new node which have not yet been combined to the
   1301     // worklist as well. Because the worklist uniques things already, this
   1302     // won't repeatedly process the same operand.
   1303     CombinedNodes.insert(N);
   1304     for (const SDValue &ChildN : N->op_values())
   1305       if (!CombinedNodes.count(ChildN.getNode()))
   1306         AddToWorklist(ChildN.getNode());
   1307 
   1308     SDValue RV = combine(N);
   1309 
   1310     if (!RV.getNode())
   1311       continue;
   1312 
   1313     ++NodesCombined;
   1314 
   1315     // If we get back the same node we passed in, rather than a new node or
   1316     // zero, we know that the node must have defined multiple values and
   1317     // CombineTo was used.  Since CombineTo takes care of the worklist
   1318     // mechanics for us, we have no work to do in this case.
   1319     if (RV.getNode() == N)
   1320       continue;
   1321 
   1322     assert(N->getOpcode() != ISD::DELETED_NODE &&
   1323            RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
   1324            "Node was deleted but visit returned new node!");
   1325 
   1326     DEBUG(dbgs() << " ... into: ";
   1327           RV.getNode()->dump(&DAG));
   1328 
   1329     // Transfer debug value.
   1330     DAG.TransferDbgValues(SDValue(N, 0), RV);
   1331     if (N->getNumValues() == RV.getNode()->getNumValues())
   1332       DAG.ReplaceAllUsesWith(N, RV.getNode());
   1333     else {
   1334       assert(N->getValueType(0) == RV.getValueType() &&
   1335              N->getNumValues() == 1 && "Type mismatch");
   1336       SDValue OpV = RV;
   1337       DAG.ReplaceAllUsesWith(N, &OpV);
   1338     }
   1339 
   1340     // Push the new node and any users onto the worklist
   1341     AddToWorklist(RV.getNode());
   1342     AddUsersToWorklist(RV.getNode());
   1343 
   1344     // Finally, if the node is now dead, remove it from the graph.  The node
   1345     // may not be dead if the replacement process recursively simplified to
   1346     // something else needing this node. This will also take care of adding any
   1347     // operands which have lost a user to the worklist.
   1348     recursivelyDeleteUnusedNodes(N);
   1349   }
   1350 
   1351   // If the root changed (e.g. it was a dead load, update the root).
   1352   DAG.setRoot(Dummy.getValue());
   1353   DAG.RemoveDeadNodes();
   1354 }
   1355 
   1356 SDValue DAGCombiner::visit(SDNode *N) {
   1357   switch (N->getOpcode()) {
   1358   default: break;
   1359   case ISD::TokenFactor:        return visitTokenFactor(N);
   1360   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
   1361   case ISD::ADD:                return visitADD(N);
   1362   case ISD::SUB:                return visitSUB(N);
   1363   case ISD::ADDC:               return visitADDC(N);
   1364   case ISD::SUBC:               return visitSUBC(N);
   1365   case ISD::ADDE:               return visitADDE(N);
   1366   case ISD::SUBE:               return visitSUBE(N);
   1367   case ISD::MUL:                return visitMUL(N);
   1368   case ISD::SDIV:               return visitSDIV(N);
   1369   case ISD::UDIV:               return visitUDIV(N);
   1370   case ISD::SREM:
   1371   case ISD::UREM:               return visitREM(N);
   1372   case ISD::MULHU:              return visitMULHU(N);
   1373   case ISD::MULHS:              return visitMULHS(N);
   1374   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
   1375   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
   1376   case ISD::SMULO:              return visitSMULO(N);
   1377   case ISD::UMULO:              return visitUMULO(N);
   1378   case ISD::SMIN:
   1379   case ISD::SMAX:
   1380   case ISD::UMIN:
   1381   case ISD::UMAX:               return visitIMINMAX(N);
   1382   case ISD::AND:                return visitAND(N);
   1383   case ISD::OR:                 return visitOR(N);
   1384   case ISD::XOR:                return visitXOR(N);
   1385   case ISD::SHL:                return visitSHL(N);
   1386   case ISD::SRA:                return visitSRA(N);
   1387   case ISD::SRL:                return visitSRL(N);
   1388   case ISD::ROTR:
   1389   case ISD::ROTL:               return visitRotate(N);
   1390   case ISD::BSWAP:              return visitBSWAP(N);
   1391   case ISD::CTLZ:               return visitCTLZ(N);
   1392   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
   1393   case ISD::CTTZ:               return visitCTTZ(N);
   1394   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
   1395   case ISD::CTPOP:              return visitCTPOP(N);
   1396   case ISD::SELECT:             return visitSELECT(N);
   1397   case ISD::VSELECT:            return visitVSELECT(N);
   1398   case ISD::SELECT_CC:          return visitSELECT_CC(N);
   1399   case ISD::SETCC:              return visitSETCC(N);
   1400   case ISD::SETCCE:             return visitSETCCE(N);
   1401   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
   1402   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
   1403   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
   1404   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
   1405   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
   1406   case ISD::TRUNCATE:           return visitTRUNCATE(N);
   1407   case ISD::BITCAST:            return visitBITCAST(N);
   1408   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
   1409   case ISD::FADD:               return visitFADD(N);
   1410   case ISD::FSUB:               return visitFSUB(N);
   1411   case ISD::FMUL:               return visitFMUL(N);
   1412   case ISD::FMA:                return visitFMA(N);
   1413   case ISD::FDIV:               return visitFDIV(N);
   1414   case ISD::FREM:               return visitFREM(N);
   1415   case ISD::FSQRT:              return visitFSQRT(N);
   1416   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
   1417   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
   1418   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
   1419   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
   1420   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
   1421   case ISD::FP_ROUND:           return visitFP_ROUND(N);
   1422   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
   1423   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
   1424   case ISD::FNEG:               return visitFNEG(N);
   1425   case ISD::FABS:               return visitFABS(N);
   1426   case ISD::FFLOOR:             return visitFFLOOR(N);
   1427   case ISD::FMINNUM:            return visitFMINNUM(N);
   1428   case ISD::FMAXNUM:            return visitFMAXNUM(N);
   1429   case ISD::FCEIL:              return visitFCEIL(N);
   1430   case ISD::FTRUNC:             return visitFTRUNC(N);
   1431   case ISD::BRCOND:             return visitBRCOND(N);
   1432   case ISD::BR_CC:              return visitBR_CC(N);
   1433   case ISD::LOAD:               return visitLOAD(N);
   1434   case ISD::STORE:              return visitSTORE(N);
   1435   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
   1436   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
   1437   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
   1438   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
   1439   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
   1440   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
   1441   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
   1442   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
   1443   case ISD::MGATHER:            return visitMGATHER(N);
   1444   case ISD::MLOAD:              return visitMLOAD(N);
   1445   case ISD::MSCATTER:           return visitMSCATTER(N);
   1446   case ISD::MSTORE:             return visitMSTORE(N);
   1447   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
   1448   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
   1449   }
   1450   return SDValue();
   1451 }
   1452 
   1453 SDValue DAGCombiner::combine(SDNode *N) {
   1454   SDValue RV = visit(N);
   1455 
   1456   // If nothing happened, try a target-specific DAG combine.
   1457   if (!RV.getNode()) {
   1458     assert(N->getOpcode() != ISD::DELETED_NODE &&
   1459            "Node was deleted but visit returned NULL!");
   1460 
   1461     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
   1462         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
   1463 
   1464       // Expose the DAG combiner to the target combiner impls.
   1465       TargetLowering::DAGCombinerInfo
   1466         DagCombineInfo(DAG, Level, false, this);
   1467 
   1468       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
   1469     }
   1470   }
   1471 
   1472   // If nothing happened still, try promoting the operation.
   1473   if (!RV.getNode()) {
   1474     switch (N->getOpcode()) {
   1475     default: break;
   1476     case ISD::ADD:
   1477     case ISD::SUB:
   1478     case ISD::MUL:
   1479     case ISD::AND:
   1480     case ISD::OR:
   1481     case ISD::XOR:
   1482       RV = PromoteIntBinOp(SDValue(N, 0));
   1483       break;
   1484     case ISD::SHL:
   1485     case ISD::SRA:
   1486     case ISD::SRL:
   1487       RV = PromoteIntShiftOp(SDValue(N, 0));
   1488       break;
   1489     case ISD::SIGN_EXTEND:
   1490     case ISD::ZERO_EXTEND:
   1491     case ISD::ANY_EXTEND:
   1492       RV = PromoteExtend(SDValue(N, 0));
   1493       break;
   1494     case ISD::LOAD:
   1495       if (PromoteLoad(SDValue(N, 0)))
   1496         RV = SDValue(N, 0);
   1497       break;
   1498     }
   1499   }
   1500 
   1501   // If N is a commutative binary node, try commuting it to enable more
   1502   // sdisel CSE.
   1503   if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
   1504       N->getNumValues() == 1) {
   1505     SDValue N0 = N->getOperand(0);
   1506     SDValue N1 = N->getOperand(1);
   1507 
   1508     // Constant operands are canonicalized to RHS.
   1509     if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
   1510       SDValue Ops[] = {N1, N0};
   1511       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
   1512                                             N->getFlags());
   1513       if (CSENode)
   1514         return SDValue(CSENode, 0);
   1515     }
   1516   }
   1517 
   1518   return RV;
   1519 }
   1520 
   1521 /// Given a node, return its input chain if it has one, otherwise return a null
   1522 /// sd operand.
   1523 static SDValue getInputChainForNode(SDNode *N) {
   1524   if (unsigned NumOps = N->getNumOperands()) {
   1525     if (N->getOperand(0).getValueType() == MVT::Other)
   1526       return N->getOperand(0);
   1527     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
   1528       return N->getOperand(NumOps-1);
   1529     for (unsigned i = 1; i < NumOps-1; ++i)
   1530       if (N->getOperand(i).getValueType() == MVT::Other)
   1531         return N->getOperand(i);
   1532   }
   1533   return SDValue();
   1534 }
   1535 
   1536 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
   1537   // If N has two operands, where one has an input chain equal to the other,
   1538   // the 'other' chain is redundant.
   1539   if (N->getNumOperands() == 2) {
   1540     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
   1541       return N->getOperand(0);
   1542     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
   1543       return N->getOperand(1);
   1544   }
   1545 
   1546   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
   1547   SmallVector<SDValue, 8> Ops;    // Ops for replacing token factor.
   1548   SmallPtrSet<SDNode*, 16> SeenOps;
   1549   bool Changed = false;             // If we should replace this token factor.
   1550 
   1551   // Start out with this token factor.
   1552   TFs.push_back(N);
   1553 
   1554   // Iterate through token factors.  The TFs grows when new token factors are
   1555   // encountered.
   1556   for (unsigned i = 0; i < TFs.size(); ++i) {
   1557     SDNode *TF = TFs[i];
   1558 
   1559     // Check each of the operands.
   1560     for (const SDValue &Op : TF->op_values()) {
   1561 
   1562       switch (Op.getOpcode()) {
   1563       case ISD::EntryToken:
   1564         // Entry tokens don't need to be added to the list. They are
   1565         // redundant.
   1566         Changed = true;
   1567         break;
   1568 
   1569       case ISD::TokenFactor:
   1570         if (Op.hasOneUse() &&
   1571             std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
   1572           // Queue up for processing.
   1573           TFs.push_back(Op.getNode());
   1574           // Clean up in case the token factor is removed.
   1575           AddToWorklist(Op.getNode());
   1576           Changed = true;
   1577           break;
   1578         }
   1579         // Fall thru
   1580 
   1581       default:
   1582         // Only add if it isn't already in the list.
   1583         if (SeenOps.insert(Op.getNode()).second)
   1584           Ops.push_back(Op);
   1585         else
   1586           Changed = true;
   1587         break;
   1588       }
   1589     }
   1590   }
   1591 
   1592   SDValue Result;
   1593 
   1594   // If we've changed things around then replace token factor.
   1595   if (Changed) {
   1596     if (Ops.empty()) {
   1597       // The entry token is the only possible outcome.
   1598       Result = DAG.getEntryNode();
   1599     } else {
   1600       // New and improved token factor.
   1601       Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
   1602     }
   1603 
   1604     // Add users to worklist if AA is enabled, since it may introduce
   1605     // a lot of new chained token factors while removing memory deps.
   1606     bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
   1607       : DAG.getSubtarget().useAA();
   1608     return CombineTo(N, Result, UseAA /*add to worklist*/);
   1609   }
   1610 
   1611   return Result;
   1612 }
   1613 
   1614 /// MERGE_VALUES can always be eliminated.
   1615 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
   1616   WorklistRemover DeadNodes(*this);
   1617   // Replacing results may cause a different MERGE_VALUES to suddenly
   1618   // be CSE'd with N, and carry its uses with it. Iterate until no
   1619   // uses remain, to ensure that the node can be safely deleted.
   1620   // First add the users of this node to the work list so that they
   1621   // can be tried again once they have new operands.
   1622   AddUsersToWorklist(N);
   1623   do {
   1624     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
   1625       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
   1626   } while (!N->use_empty());
   1627   deleteAndRecombine(N);
   1628   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   1629 }
   1630 
   1631 /// If \p N is a ContantSDNode with isOpaque() == false return it casted to a
   1632 /// ContantSDNode pointer else nullptr.
   1633 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
   1634   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
   1635   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
   1636 }
   1637 
   1638 SDValue DAGCombiner::visitADD(SDNode *N) {
   1639   SDValue N0 = N->getOperand(0);
   1640   SDValue N1 = N->getOperand(1);
   1641   EVT VT = N0.getValueType();
   1642 
   1643   // fold vector ops
   1644   if (VT.isVector()) {
   1645     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   1646       return FoldedVOp;
   1647 
   1648     // fold (add x, 0) -> x, vector edition
   1649     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   1650       return N0;
   1651     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   1652       return N1;
   1653   }
   1654 
   1655   // fold (add x, undef) -> undef
   1656   if (N0.getOpcode() == ISD::UNDEF)
   1657     return N0;
   1658   if (N1.getOpcode() == ISD::UNDEF)
   1659     return N1;
   1660   // fold (add c1, c2) -> c1+c2
   1661   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   1662   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   1663   if (N0C && N1C)
   1664     return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C);
   1665   // canonicalize constant to RHS
   1666   if (isConstantIntBuildVectorOrConstantInt(N0) &&
   1667      !isConstantIntBuildVectorOrConstantInt(N1))
   1668     return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
   1669   // fold (add x, 0) -> x
   1670   if (isNullConstant(N1))
   1671     return N0;
   1672   // fold (add Sym, c) -> Sym+c
   1673   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
   1674     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
   1675         GA->getOpcode() == ISD::GlobalAddress)
   1676       return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
   1677                                   GA->getOffset() +
   1678                                     (uint64_t)N1C->getSExtValue());
   1679   // fold ((c1-A)+c2) -> (c1+c2)-A
   1680   if (N1C && N0.getOpcode() == ISD::SUB)
   1681     if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) {
   1682       SDLoc DL(N);
   1683       return DAG.getNode(ISD::SUB, DL, VT,
   1684                          DAG.getConstant(N1C->getAPIntValue()+
   1685                                          N0C->getAPIntValue(), DL, VT),
   1686                          N0.getOperand(1));
   1687     }
   1688   // reassociate add
   1689   if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
   1690     return RADD;
   1691   // fold ((0-A) + B) -> B-A
   1692   if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0)))
   1693     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
   1694   // fold (A + (0-B)) -> A-B
   1695   if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0)))
   1696     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
   1697   // fold (A+(B-A)) -> B
   1698   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
   1699     return N1.getOperand(0);
   1700   // fold ((B-A)+A) -> B
   1701   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
   1702     return N0.getOperand(0);
   1703   // fold (A+(B-(A+C))) to (B-C)
   1704   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
   1705       N0 == N1.getOperand(1).getOperand(0))
   1706     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
   1707                        N1.getOperand(1).getOperand(1));
   1708   // fold (A+(B-(C+A))) to (B-C)
   1709   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
   1710       N0 == N1.getOperand(1).getOperand(1))
   1711     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
   1712                        N1.getOperand(1).getOperand(0));
   1713   // fold (A+((B-A)+or-C)) to (B+or-C)
   1714   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
   1715       N1.getOperand(0).getOpcode() == ISD::SUB &&
   1716       N0 == N1.getOperand(0).getOperand(1))
   1717     return DAG.getNode(N1.getOpcode(), SDLoc(N), VT,
   1718                        N1.getOperand(0).getOperand(0), N1.getOperand(1));
   1719 
   1720   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
   1721   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
   1722     SDValue N00 = N0.getOperand(0);
   1723     SDValue N01 = N0.getOperand(1);
   1724     SDValue N10 = N1.getOperand(0);
   1725     SDValue N11 = N1.getOperand(1);
   1726 
   1727     if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
   1728       return DAG.getNode(ISD::SUB, SDLoc(N), VT,
   1729                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
   1730                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
   1731   }
   1732 
   1733   if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
   1734     return SDValue(N, 0);
   1735 
   1736   // fold (a+b) -> (a|b) iff a and b share no bits.
   1737   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
   1738       VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1))
   1739     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
   1740 
   1741   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
   1742   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
   1743       isNullConstant(N1.getOperand(0).getOperand(0)))
   1744     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
   1745                        DAG.getNode(ISD::SHL, SDLoc(N), VT,
   1746                                    N1.getOperand(0).getOperand(1),
   1747                                    N1.getOperand(1)));
   1748   if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
   1749       isNullConstant(N0.getOperand(0).getOperand(0)))
   1750     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
   1751                        DAG.getNode(ISD::SHL, SDLoc(N), VT,
   1752                                    N0.getOperand(0).getOperand(1),
   1753                                    N0.getOperand(1)));
   1754 
   1755   if (N1.getOpcode() == ISD::AND) {
   1756     SDValue AndOp0 = N1.getOperand(0);
   1757     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
   1758     unsigned DestBits = VT.getScalarType().getSizeInBits();
   1759 
   1760     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
   1761     // and similar xforms where the inner op is either ~0 or 0.
   1762     if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) {
   1763       SDLoc DL(N);
   1764       return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
   1765     }
   1766   }
   1767 
   1768   // add (sext i1), X -> sub X, (zext i1)
   1769   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
   1770       N0.getOperand(0).getValueType() == MVT::i1 &&
   1771       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
   1772     SDLoc DL(N);
   1773     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
   1774     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
   1775   }
   1776 
   1777   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
   1778   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
   1779     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
   1780     if (TN->getVT() == MVT::i1) {
   1781       SDLoc DL(N);
   1782       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
   1783                                  DAG.getConstant(1, DL, VT));
   1784       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
   1785     }
   1786   }
   1787 
   1788   return SDValue();
   1789 }
   1790 
   1791 SDValue DAGCombiner::visitADDC(SDNode *N) {
   1792   SDValue N0 = N->getOperand(0);
   1793   SDValue N1 = N->getOperand(1);
   1794   EVT VT = N0.getValueType();
   1795 
   1796   // If the flag result is dead, turn this into an ADD.
   1797   if (!N->hasAnyUseOfValue(1))
   1798     return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1),
   1799                      DAG.getNode(ISD::CARRY_FALSE,
   1800                                  SDLoc(N), MVT::Glue));
   1801 
   1802   // canonicalize constant to RHS.
   1803   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   1804   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   1805   if (N0C && !N1C)
   1806     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
   1807 
   1808   // fold (addc x, 0) -> x + no carry out
   1809   if (isNullConstant(N1))
   1810     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
   1811                                         SDLoc(N), MVT::Glue));
   1812 
   1813   // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
   1814   APInt LHSZero, LHSOne;
   1815   APInt RHSZero, RHSOne;
   1816   DAG.computeKnownBits(N0, LHSZero, LHSOne);
   1817 
   1818   if (LHSZero.getBoolValue()) {
   1819     DAG.computeKnownBits(N1, RHSZero, RHSOne);
   1820 
   1821     // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
   1822     // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
   1823     if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
   1824       return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1),
   1825                        DAG.getNode(ISD::CARRY_FALSE,
   1826                                    SDLoc(N), MVT::Glue));
   1827   }
   1828 
   1829   return SDValue();
   1830 }
   1831 
   1832 SDValue DAGCombiner::visitADDE(SDNode *N) {
   1833   SDValue N0 = N->getOperand(0);
   1834   SDValue N1 = N->getOperand(1);
   1835   SDValue CarryIn = N->getOperand(2);
   1836 
   1837   // canonicalize constant to RHS
   1838   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   1839   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   1840   if (N0C && !N1C)
   1841     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
   1842                        N1, N0, CarryIn);
   1843 
   1844   // fold (adde x, y, false) -> (addc x, y)
   1845   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
   1846     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
   1847 
   1848   return SDValue();
   1849 }
   1850 
   1851 // Since it may not be valid to emit a fold to zero for vector initializers
   1852 // check if we can before folding.
   1853 static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
   1854                              SelectionDAG &DAG,
   1855                              bool LegalOperations, bool LegalTypes) {
   1856   if (!VT.isVector())
   1857     return DAG.getConstant(0, DL, VT);
   1858   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
   1859     return DAG.getConstant(0, DL, VT);
   1860   return SDValue();
   1861 }
   1862 
   1863 SDValue DAGCombiner::visitSUB(SDNode *N) {
   1864   SDValue N0 = N->getOperand(0);
   1865   SDValue N1 = N->getOperand(1);
   1866   EVT VT = N0.getValueType();
   1867 
   1868   // fold vector ops
   1869   if (VT.isVector()) {
   1870     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   1871       return FoldedVOp;
   1872 
   1873     // fold (sub x, 0) -> x, vector edition
   1874     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   1875       return N0;
   1876   }
   1877 
   1878   // fold (sub x, x) -> 0
   1879   // FIXME: Refactor this and xor and other similar operations together.
   1880   if (N0 == N1)
   1881     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
   1882   // fold (sub c1, c2) -> c1-c2
   1883   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   1884   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   1885   if (N0C && N1C)
   1886     return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C);
   1887   // fold (sub x, c) -> (add x, -c)
   1888   if (N1C) {
   1889     SDLoc DL(N);
   1890     return DAG.getNode(ISD::ADD, DL, VT, N0,
   1891                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
   1892   }
   1893   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
   1894   if (isAllOnesConstant(N0))
   1895     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
   1896   // fold A-(A-B) -> B
   1897   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
   1898     return N1.getOperand(1);
   1899   // fold (A+B)-A -> B
   1900   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
   1901     return N0.getOperand(1);
   1902   // fold (A+B)-B -> A
   1903   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
   1904     return N0.getOperand(0);
   1905   // fold C2-(A+C1) -> (C2-C1)-A
   1906   ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
   1907     dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
   1908   if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
   1909     SDLoc DL(N);
   1910     SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
   1911                                    DL, VT);
   1912     return DAG.getNode(ISD::SUB, DL, VT, NewC,
   1913                        N1.getOperand(0));
   1914   }
   1915   // fold ((A+(B+or-C))-B) -> A+or-C
   1916   if (N0.getOpcode() == ISD::ADD &&
   1917       (N0.getOperand(1).getOpcode() == ISD::SUB ||
   1918        N0.getOperand(1).getOpcode() == ISD::ADD) &&
   1919       N0.getOperand(1).getOperand(0) == N1)
   1920     return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT,
   1921                        N0.getOperand(0), N0.getOperand(1).getOperand(1));
   1922   // fold ((A+(C+B))-B) -> A+C
   1923   if (N0.getOpcode() == ISD::ADD &&
   1924       N0.getOperand(1).getOpcode() == ISD::ADD &&
   1925       N0.getOperand(1).getOperand(1) == N1)
   1926     return DAG.getNode(ISD::ADD, SDLoc(N), VT,
   1927                        N0.getOperand(0), N0.getOperand(1).getOperand(0));
   1928   // fold ((A-(B-C))-C) -> A-B
   1929   if (N0.getOpcode() == ISD::SUB &&
   1930       N0.getOperand(1).getOpcode() == ISD::SUB &&
   1931       N0.getOperand(1).getOperand(1) == N1)
   1932     return DAG.getNode(ISD::SUB, SDLoc(N), VT,
   1933                        N0.getOperand(0), N0.getOperand(1).getOperand(0));
   1934 
   1935   // If either operand of a sub is undef, the result is undef
   1936   if (N0.getOpcode() == ISD::UNDEF)
   1937     return N0;
   1938   if (N1.getOpcode() == ISD::UNDEF)
   1939     return N1;
   1940 
   1941   // If the relocation model supports it, consider symbol offsets.
   1942   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
   1943     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
   1944       // fold (sub Sym, c) -> Sym-c
   1945       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
   1946         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
   1947                                     GA->getOffset() -
   1948                                       (uint64_t)N1C->getSExtValue());
   1949       // fold (sub Sym+c1, Sym+c2) -> c1-c2
   1950       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
   1951         if (GA->getGlobal() == GB->getGlobal())
   1952           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
   1953                                  SDLoc(N), VT);
   1954     }
   1955 
   1956   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
   1957   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
   1958     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
   1959     if (TN->getVT() == MVT::i1) {
   1960       SDLoc DL(N);
   1961       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
   1962                                  DAG.getConstant(1, DL, VT));
   1963       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
   1964     }
   1965   }
   1966 
   1967   return SDValue();
   1968 }
   1969 
   1970 SDValue DAGCombiner::visitSUBC(SDNode *N) {
   1971   SDValue N0 = N->getOperand(0);
   1972   SDValue N1 = N->getOperand(1);
   1973   EVT VT = N0.getValueType();
   1974   SDLoc DL(N);
   1975 
   1976   // If the flag result is dead, turn this into an SUB.
   1977   if (!N->hasAnyUseOfValue(1))
   1978     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
   1979                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
   1980 
   1981   // fold (subc x, x) -> 0 + no borrow
   1982   if (N0 == N1)
   1983     return CombineTo(N, DAG.getConstant(0, DL, VT),
   1984                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
   1985 
   1986   // fold (subc x, 0) -> x + no borrow
   1987   if (isNullConstant(N1))
   1988     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
   1989 
   1990   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
   1991   if (isAllOnesConstant(N0))
   1992     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
   1993                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
   1994 
   1995   return SDValue();
   1996 }
   1997 
   1998 SDValue DAGCombiner::visitSUBE(SDNode *N) {
   1999   SDValue N0 = N->getOperand(0);
   2000   SDValue N1 = N->getOperand(1);
   2001   SDValue CarryIn = N->getOperand(2);
   2002 
   2003   // fold (sube x, y, false) -> (subc x, y)
   2004   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
   2005     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
   2006 
   2007   return SDValue();
   2008 }
   2009 
   2010 SDValue DAGCombiner::visitMUL(SDNode *N) {
   2011   SDValue N0 = N->getOperand(0);
   2012   SDValue N1 = N->getOperand(1);
   2013   EVT VT = N0.getValueType();
   2014 
   2015   // fold (mul x, undef) -> 0
   2016   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
   2017     return DAG.getConstant(0, SDLoc(N), VT);
   2018 
   2019   bool N0IsConst = false;
   2020   bool N1IsConst = false;
   2021   bool N1IsOpaqueConst = false;
   2022   bool N0IsOpaqueConst = false;
   2023   APInt ConstValue0, ConstValue1;
   2024   // fold vector ops
   2025   if (VT.isVector()) {
   2026     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   2027       return FoldedVOp;
   2028 
   2029     N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
   2030     N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
   2031   } else {
   2032     N0IsConst = isa<ConstantSDNode>(N0);
   2033     if (N0IsConst) {
   2034       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
   2035       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
   2036     }
   2037     N1IsConst = isa<ConstantSDNode>(N1);
   2038     if (N1IsConst) {
   2039       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
   2040       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
   2041     }
   2042   }
   2043 
   2044   // fold (mul c1, c2) -> c1*c2
   2045   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
   2046     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
   2047                                       N0.getNode(), N1.getNode());
   2048 
   2049   // canonicalize constant to RHS (vector doesn't have to splat)
   2050   if (isConstantIntBuildVectorOrConstantInt(N0) &&
   2051      !isConstantIntBuildVectorOrConstantInt(N1))
   2052     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
   2053   // fold (mul x, 0) -> 0
   2054   if (N1IsConst && ConstValue1 == 0)
   2055     return N1;
   2056   // We require a splat of the entire scalar bit width for non-contiguous
   2057   // bit patterns.
   2058   bool IsFullSplat =
   2059     ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits();
   2060   // fold (mul x, 1) -> x
   2061   if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
   2062     return N0;
   2063   // fold (mul x, -1) -> 0-x
   2064   if (N1IsConst && ConstValue1.isAllOnesValue()) {
   2065     SDLoc DL(N);
   2066     return DAG.getNode(ISD::SUB, DL, VT,
   2067                        DAG.getConstant(0, DL, VT), N0);
   2068   }
   2069   // fold (mul x, (1 << c)) -> x << c
   2070   if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
   2071       IsFullSplat) {
   2072     SDLoc DL(N);
   2073     return DAG.getNode(ISD::SHL, DL, VT, N0,
   2074                        DAG.getConstant(ConstValue1.logBase2(), DL,
   2075                                        getShiftAmountTy(N0.getValueType())));
   2076   }
   2077   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
   2078   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
   2079       IsFullSplat) {
   2080     unsigned Log2Val = (-ConstValue1).logBase2();
   2081     SDLoc DL(N);
   2082     // FIXME: If the input is something that is easily negated (e.g. a
   2083     // single-use add), we should put the negate there.
   2084     return DAG.getNode(ISD::SUB, DL, VT,
   2085                        DAG.getConstant(0, DL, VT),
   2086                        DAG.getNode(ISD::SHL, DL, VT, N0,
   2087                             DAG.getConstant(Log2Val, DL,
   2088                                       getShiftAmountTy(N0.getValueType()))));
   2089   }
   2090 
   2091   APInt Val;
   2092   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
   2093   if (N1IsConst && N0.getOpcode() == ISD::SHL &&
   2094       (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
   2095                      isa<ConstantSDNode>(N0.getOperand(1)))) {
   2096     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT,
   2097                              N1, N0.getOperand(1));
   2098     AddToWorklist(C3.getNode());
   2099     return DAG.getNode(ISD::MUL, SDLoc(N), VT,
   2100                        N0.getOperand(0), C3);
   2101   }
   2102 
   2103   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
   2104   // use.
   2105   {
   2106     SDValue Sh(nullptr,0), Y(nullptr,0);
   2107     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
   2108     if (N0.getOpcode() == ISD::SHL &&
   2109         (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
   2110                        isa<ConstantSDNode>(N0.getOperand(1))) &&
   2111         N0.getNode()->hasOneUse()) {
   2112       Sh = N0; Y = N1;
   2113     } else if (N1.getOpcode() == ISD::SHL &&
   2114                isa<ConstantSDNode>(N1.getOperand(1)) &&
   2115                N1.getNode()->hasOneUse()) {
   2116       Sh = N1; Y = N0;
   2117     }
   2118 
   2119     if (Sh.getNode()) {
   2120       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
   2121                                 Sh.getOperand(0), Y);
   2122       return DAG.getNode(ISD::SHL, SDLoc(N), VT,
   2123                          Mul, Sh.getOperand(1));
   2124     }
   2125   }
   2126 
   2127   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
   2128   if (isConstantIntBuildVectorOrConstantInt(N1) &&
   2129       N0.getOpcode() == ISD::ADD &&
   2130       isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
   2131       isMulAddWithConstProfitable(N, N0, N1))
   2132       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
   2133                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
   2134                                      N0.getOperand(0), N1),
   2135                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
   2136                                      N0.getOperand(1), N1));
   2137 
   2138   // reassociate mul
   2139   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
   2140     return RMUL;
   2141 
   2142   return SDValue();
   2143 }
   2144 
   2145 /// Return true if divmod libcall is available.
   2146 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
   2147                                      const TargetLowering &TLI) {
   2148   RTLIB::Libcall LC;
   2149   switch (Node->getSimpleValueType(0).SimpleTy) {
   2150   default: return false; // No libcall for vector types.
   2151   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
   2152   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
   2153   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
   2154   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
   2155   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
   2156   }
   2157 
   2158   return TLI.getLibcallName(LC) != nullptr;
   2159 }
   2160 
   2161 /// Issue divrem if both quotient and remainder are needed.
   2162 SDValue DAGCombiner::useDivRem(SDNode *Node) {
   2163   if (Node->use_empty())
   2164     return SDValue(); // This is a dead node, leave it alone.
   2165 
   2166   EVT VT = Node->getValueType(0);
   2167   if (!TLI.isTypeLegal(VT))
   2168     return SDValue();
   2169 
   2170   unsigned Opcode = Node->getOpcode();
   2171   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
   2172 
   2173   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
   2174   // If DIVREM is going to get expanded into a libcall,
   2175   // but there is no libcall available, then don't combine.
   2176   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
   2177       !isDivRemLibcallAvailable(Node, isSigned, TLI))
   2178     return SDValue();
   2179 
   2180   // If div is legal, it's better to do the normal expansion
   2181   unsigned OtherOpcode = 0;
   2182   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
   2183     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
   2184     if (TLI.isOperationLegalOrCustom(Opcode, VT))
   2185       return SDValue();
   2186   } else {
   2187     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
   2188     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
   2189       return SDValue();
   2190   }
   2191 
   2192   SDValue Op0 = Node->getOperand(0);
   2193   SDValue Op1 = Node->getOperand(1);
   2194   SDValue combined;
   2195   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
   2196          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
   2197     SDNode *User = *UI;
   2198     if (User == Node || User->use_empty())
   2199       continue;
   2200     // Convert the other matching node(s), too;
   2201     // otherwise, the DIVREM may get target-legalized into something
   2202     // target-specific that we won't be able to recognize.
   2203     unsigned UserOpc = User->getOpcode();
   2204     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
   2205         User->getOperand(0) == Op0 &&
   2206         User->getOperand(1) == Op1) {
   2207       if (!combined) {
   2208         if (UserOpc == OtherOpcode) {
   2209           SDVTList VTs = DAG.getVTList(VT, VT);
   2210           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
   2211         } else if (UserOpc == DivRemOpc) {
   2212           combined = SDValue(User, 0);
   2213         } else {
   2214           assert(UserOpc == Opcode);
   2215           continue;
   2216         }
   2217       }
   2218       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
   2219         CombineTo(User, combined);
   2220       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
   2221         CombineTo(User, combined.getValue(1));
   2222     }
   2223   }
   2224   return combined;
   2225 }
   2226 
   2227 SDValue DAGCombiner::visitSDIV(SDNode *N) {
   2228   SDValue N0 = N->getOperand(0);
   2229   SDValue N1 = N->getOperand(1);
   2230   EVT VT = N->getValueType(0);
   2231 
   2232   // fold vector ops
   2233   if (VT.isVector())
   2234     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   2235       return FoldedVOp;
   2236 
   2237   SDLoc DL(N);
   2238 
   2239   // fold (sdiv c1, c2) -> c1/c2
   2240   ConstantSDNode *N0C = isConstOrConstSplat(N0);
   2241   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   2242   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
   2243     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
   2244   // fold (sdiv X, 1) -> X
   2245   if (N1C && N1C->isOne())
   2246     return N0;
   2247   // fold (sdiv X, -1) -> 0-X
   2248   if (N1C && N1C->isAllOnesValue())
   2249     return DAG.getNode(ISD::SUB, DL, VT,
   2250                        DAG.getConstant(0, DL, VT), N0);
   2251 
   2252   // If we know the sign bits of both operands are zero, strength reduce to a
   2253   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
   2254   if (!VT.isVector()) {
   2255     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
   2256       return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
   2257   }
   2258 
   2259   // fold (sdiv X, pow2) -> simple ops after legalize
   2260   // FIXME: We check for the exact bit here because the generic lowering gives
   2261   // better results in that case. The target-specific lowering should learn how
   2262   // to handle exact sdivs efficiently.
   2263   if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
   2264       !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
   2265       (N1C->getAPIntValue().isPowerOf2() ||
   2266        (-N1C->getAPIntValue()).isPowerOf2())) {
   2267     // Target-specific implementation of sdiv x, pow2.
   2268     if (SDValue Res = BuildSDIVPow2(N))
   2269       return Res;
   2270 
   2271     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
   2272 
   2273     // Splat the sign bit into the register
   2274     SDValue SGN =
   2275         DAG.getNode(ISD::SRA, DL, VT, N0,
   2276                     DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
   2277                                     getShiftAmountTy(N0.getValueType())));
   2278     AddToWorklist(SGN.getNode());
   2279 
   2280     // Add (N0 < 0) ? abs2 - 1 : 0;
   2281     SDValue SRL =
   2282         DAG.getNode(ISD::SRL, DL, VT, SGN,
   2283                     DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
   2284                                     getShiftAmountTy(SGN.getValueType())));
   2285     SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
   2286     AddToWorklist(SRL.getNode());
   2287     AddToWorklist(ADD.getNode());    // Divide by pow2
   2288     SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
   2289                   DAG.getConstant(lg2, DL,
   2290                                   getShiftAmountTy(ADD.getValueType())));
   2291 
   2292     // If we're dividing by a positive value, we're done.  Otherwise, we must
   2293     // negate the result.
   2294     if (N1C->getAPIntValue().isNonNegative())
   2295       return SRA;
   2296 
   2297     AddToWorklist(SRA.getNode());
   2298     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
   2299   }
   2300 
   2301   // If integer divide is expensive and we satisfy the requirements, emit an
   2302   // alternate sequence.  Targets may check function attributes for size/speed
   2303   // trade-offs.
   2304   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
   2305   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
   2306     if (SDValue Op = BuildSDIV(N))
   2307       return Op;
   2308 
   2309   // sdiv, srem -> sdivrem
   2310   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
   2311   // Otherwise, we break the simplification logic in visitREM().
   2312   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
   2313     if (SDValue DivRem = useDivRem(N))
   2314         return DivRem;
   2315 
   2316   // undef / X -> 0
   2317   if (N0.getOpcode() == ISD::UNDEF)
   2318     return DAG.getConstant(0, DL, VT);
   2319   // X / undef -> undef
   2320   if (N1.getOpcode() == ISD::UNDEF)
   2321     return N1;
   2322 
   2323   return SDValue();
   2324 }
   2325 
   2326 SDValue DAGCombiner::visitUDIV(SDNode *N) {
   2327   SDValue N0 = N->getOperand(0);
   2328   SDValue N1 = N->getOperand(1);
   2329   EVT VT = N->getValueType(0);
   2330 
   2331   // fold vector ops
   2332   if (VT.isVector())
   2333     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   2334       return FoldedVOp;
   2335 
   2336   SDLoc DL(N);
   2337 
   2338   // fold (udiv c1, c2) -> c1/c2
   2339   ConstantSDNode *N0C = isConstOrConstSplat(N0);
   2340   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   2341   if (N0C && N1C)
   2342     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
   2343                                                     N0C, N1C))
   2344       return Folded;
   2345   // fold (udiv x, (1 << c)) -> x >>u c
   2346   if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2())
   2347     return DAG.getNode(ISD::SRL, DL, VT, N0,
   2348                        DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
   2349                                        getShiftAmountTy(N0.getValueType())));
   2350 
   2351   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
   2352   if (N1.getOpcode() == ISD::SHL) {
   2353     if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
   2354       if (SHC->getAPIntValue().isPowerOf2()) {
   2355         EVT ADDVT = N1.getOperand(1).getValueType();
   2356         SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
   2357                                   N1.getOperand(1),
   2358                                   DAG.getConstant(SHC->getAPIntValue()
   2359                                                                   .logBase2(),
   2360                                                   DL, ADDVT));
   2361         AddToWorklist(Add.getNode());
   2362         return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
   2363       }
   2364     }
   2365   }
   2366 
   2367   // fold (udiv x, c) -> alternate
   2368   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
   2369   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
   2370     if (SDValue Op = BuildUDIV(N))
   2371       return Op;
   2372 
   2373   // sdiv, srem -> sdivrem
   2374   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
   2375   // Otherwise, we break the simplification logic in visitREM().
   2376   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
   2377     if (SDValue DivRem = useDivRem(N))
   2378         return DivRem;
   2379 
   2380   // undef / X -> 0
   2381   if (N0.getOpcode() == ISD::UNDEF)
   2382     return DAG.getConstant(0, DL, VT);
   2383   // X / undef -> undef
   2384   if (N1.getOpcode() == ISD::UNDEF)
   2385     return N1;
   2386 
   2387   return SDValue();
   2388 }
   2389 
   2390 // handles ISD::SREM and ISD::UREM
   2391 SDValue DAGCombiner::visitREM(SDNode *N) {
   2392   unsigned Opcode = N->getOpcode();
   2393   SDValue N0 = N->getOperand(0);
   2394   SDValue N1 = N->getOperand(1);
   2395   EVT VT = N->getValueType(0);
   2396   bool isSigned = (Opcode == ISD::SREM);
   2397   SDLoc DL(N);
   2398 
   2399   // fold (rem c1, c2) -> c1%c2
   2400   ConstantSDNode *N0C = isConstOrConstSplat(N0);
   2401   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   2402   if (N0C && N1C)
   2403     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
   2404       return Folded;
   2405 
   2406   if (isSigned) {
   2407     // If we know the sign bits of both operands are zero, strength reduce to a
   2408     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
   2409     if (!VT.isVector()) {
   2410       if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
   2411         return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
   2412     }
   2413   } else {
   2414     // fold (urem x, pow2) -> (and x, pow2-1)
   2415     if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
   2416         N1C->getAPIntValue().isPowerOf2()) {
   2417       return DAG.getNode(ISD::AND, DL, VT, N0,
   2418                          DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
   2419     }
   2420     // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
   2421     if (N1.getOpcode() == ISD::SHL) {
   2422       if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
   2423         if (SHC->getAPIntValue().isPowerOf2()) {
   2424           SDValue Add =
   2425             DAG.getNode(ISD::ADD, DL, VT, N1,
   2426                  DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL,
   2427                                  VT));
   2428           AddToWorklist(Add.getNode());
   2429           return DAG.getNode(ISD::AND, DL, VT, N0, Add);
   2430         }
   2431       }
   2432     }
   2433   }
   2434 
   2435   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
   2436 
   2437   // If X/C can be simplified by the division-by-constant logic, lower
   2438   // X%C to the equivalent of X-X/C*C.
   2439   // To avoid mangling nodes, this simplification requires that the combine()
   2440   // call for the speculative DIV must not cause a DIVREM conversion.  We guard
   2441   // against this by skipping the simplification if isIntDivCheap().  When
   2442   // div is not cheap, combine will not return a DIVREM.  Regardless,
   2443   // checking cheapness here makes sense since the simplification results in
   2444   // fatter code.
   2445   if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
   2446     unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
   2447     SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
   2448     AddToWorklist(Div.getNode());
   2449     SDValue OptimizedDiv = combine(Div.getNode());
   2450     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
   2451       assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
   2452              (OptimizedDiv.getOpcode() != ISD::SDIVREM));
   2453       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
   2454       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
   2455       AddToWorklist(Mul.getNode());
   2456       return Sub;
   2457     }
   2458   }
   2459 
   2460   // sdiv, srem -> sdivrem
   2461   if (SDValue DivRem = useDivRem(N))
   2462     return DivRem.getValue(1);
   2463 
   2464   // undef % X -> 0
   2465   if (N0.getOpcode() == ISD::UNDEF)
   2466     return DAG.getConstant(0, DL, VT);
   2467   // X % undef -> undef
   2468   if (N1.getOpcode() == ISD::UNDEF)
   2469     return N1;
   2470 
   2471   return SDValue();
   2472 }
   2473 
   2474 SDValue DAGCombiner::visitMULHS(SDNode *N) {
   2475   SDValue N0 = N->getOperand(0);
   2476   SDValue N1 = N->getOperand(1);
   2477   EVT VT = N->getValueType(0);
   2478   SDLoc DL(N);
   2479 
   2480   // fold (mulhs x, 0) -> 0
   2481   if (isNullConstant(N1))
   2482     return N1;
   2483   // fold (mulhs x, 1) -> (sra x, size(x)-1)
   2484   if (isOneConstant(N1)) {
   2485     SDLoc DL(N);
   2486     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
   2487                        DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
   2488                                        DL,
   2489                                        getShiftAmountTy(N0.getValueType())));
   2490   }
   2491   // fold (mulhs x, undef) -> 0
   2492   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
   2493     return DAG.getConstant(0, SDLoc(N), VT);
   2494 
   2495   // If the type twice as wide is legal, transform the mulhs to a wider multiply
   2496   // plus a shift.
   2497   if (VT.isSimple() && !VT.isVector()) {
   2498     MVT Simple = VT.getSimpleVT();
   2499     unsigned SimpleSize = Simple.getSizeInBits();
   2500     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
   2501     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
   2502       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
   2503       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
   2504       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
   2505       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
   2506             DAG.getConstant(SimpleSize, DL,
   2507                             getShiftAmountTy(N1.getValueType())));
   2508       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
   2509     }
   2510   }
   2511 
   2512   return SDValue();
   2513 }
   2514 
   2515 SDValue DAGCombiner::visitMULHU(SDNode *N) {
   2516   SDValue N0 = N->getOperand(0);
   2517   SDValue N1 = N->getOperand(1);
   2518   EVT VT = N->getValueType(0);
   2519   SDLoc DL(N);
   2520 
   2521   // fold (mulhu x, 0) -> 0
   2522   if (isNullConstant(N1))
   2523     return N1;
   2524   // fold (mulhu x, 1) -> 0
   2525   if (isOneConstant(N1))
   2526     return DAG.getConstant(0, DL, N0.getValueType());
   2527   // fold (mulhu x, undef) -> 0
   2528   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
   2529     return DAG.getConstant(0, DL, VT);
   2530 
   2531   // If the type twice as wide is legal, transform the mulhu to a wider multiply
   2532   // plus a shift.
   2533   if (VT.isSimple() && !VT.isVector()) {
   2534     MVT Simple = VT.getSimpleVT();
   2535     unsigned SimpleSize = Simple.getSizeInBits();
   2536     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
   2537     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
   2538       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
   2539       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
   2540       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
   2541       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
   2542             DAG.getConstant(SimpleSize, DL,
   2543                             getShiftAmountTy(N1.getValueType())));
   2544       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
   2545     }
   2546   }
   2547 
   2548   return SDValue();
   2549 }
   2550 
   2551 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
   2552 /// give the opcodes for the two computations that are being performed. Return
   2553 /// true if a simplification was made.
   2554 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
   2555                                                 unsigned HiOp) {
   2556   // If the high half is not needed, just compute the low half.
   2557   bool HiExists = N->hasAnyUseOfValue(1);
   2558   if (!HiExists &&
   2559       (!LegalOperations ||
   2560        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
   2561     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
   2562     return CombineTo(N, Res, Res);
   2563   }
   2564 
   2565   // If the low half is not needed, just compute the high half.
   2566   bool LoExists = N->hasAnyUseOfValue(0);
   2567   if (!LoExists &&
   2568       (!LegalOperations ||
   2569        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
   2570     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
   2571     return CombineTo(N, Res, Res);
   2572   }
   2573 
   2574   // If both halves are used, return as it is.
   2575   if (LoExists && HiExists)
   2576     return SDValue();
   2577 
   2578   // If the two computed results can be simplified separately, separate them.
   2579   if (LoExists) {
   2580     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
   2581     AddToWorklist(Lo.getNode());
   2582     SDValue LoOpt = combine(Lo.getNode());
   2583     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
   2584         (!LegalOperations ||
   2585          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
   2586       return CombineTo(N, LoOpt, LoOpt);
   2587   }
   2588 
   2589   if (HiExists) {
   2590     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
   2591     AddToWorklist(Hi.getNode());
   2592     SDValue HiOpt = combine(Hi.getNode());
   2593     if (HiOpt.getNode() && HiOpt != Hi &&
   2594         (!LegalOperations ||
   2595          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
   2596       return CombineTo(N, HiOpt, HiOpt);
   2597   }
   2598 
   2599   return SDValue();
   2600 }
   2601 
   2602 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
   2603   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
   2604     return Res;
   2605 
   2606   EVT VT = N->getValueType(0);
   2607   SDLoc DL(N);
   2608 
   2609   // If the type is twice as wide is legal, transform the mulhu to a wider
   2610   // multiply plus a shift.
   2611   if (VT.isSimple() && !VT.isVector()) {
   2612     MVT Simple = VT.getSimpleVT();
   2613     unsigned SimpleSize = Simple.getSizeInBits();
   2614     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
   2615     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
   2616       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
   2617       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
   2618       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
   2619       // Compute the high part as N1.
   2620       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
   2621             DAG.getConstant(SimpleSize, DL,
   2622                             getShiftAmountTy(Lo.getValueType())));
   2623       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
   2624       // Compute the low part as N0.
   2625       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
   2626       return CombineTo(N, Lo, Hi);
   2627     }
   2628   }
   2629 
   2630   return SDValue();
   2631 }
   2632 
   2633 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
   2634   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
   2635     return Res;
   2636 
   2637   EVT VT = N->getValueType(0);
   2638   SDLoc DL(N);
   2639 
   2640   // If the type is twice as wide is legal, transform the mulhu to a wider
   2641   // multiply plus a shift.
   2642   if (VT.isSimple() && !VT.isVector()) {
   2643     MVT Simple = VT.getSimpleVT();
   2644     unsigned SimpleSize = Simple.getSizeInBits();
   2645     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
   2646     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
   2647       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
   2648       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
   2649       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
   2650       // Compute the high part as N1.
   2651       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
   2652             DAG.getConstant(SimpleSize, DL,
   2653                             getShiftAmountTy(Lo.getValueType())));
   2654       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
   2655       // Compute the low part as N0.
   2656       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
   2657       return CombineTo(N, Lo, Hi);
   2658     }
   2659   }
   2660 
   2661   return SDValue();
   2662 }
   2663 
   2664 SDValue DAGCombiner::visitSMULO(SDNode *N) {
   2665   // (smulo x, 2) -> (saddo x, x)
   2666   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
   2667     if (C2->getAPIntValue() == 2)
   2668       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
   2669                          N->getOperand(0), N->getOperand(0));
   2670 
   2671   return SDValue();
   2672 }
   2673 
   2674 SDValue DAGCombiner::visitUMULO(SDNode *N) {
   2675   // (umulo x, 2) -> (uaddo x, x)
   2676   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
   2677     if (C2->getAPIntValue() == 2)
   2678       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
   2679                          N->getOperand(0), N->getOperand(0));
   2680 
   2681   return SDValue();
   2682 }
   2683 
   2684 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
   2685   SDValue N0 = N->getOperand(0);
   2686   SDValue N1 = N->getOperand(1);
   2687   EVT VT = N0.getValueType();
   2688 
   2689   // fold vector ops
   2690   if (VT.isVector())
   2691     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   2692       return FoldedVOp;
   2693 
   2694   // fold (add c1, c2) -> c1+c2
   2695   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   2696   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   2697   if (N0C && N1C)
   2698     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
   2699 
   2700   // canonicalize constant to RHS
   2701   if (isConstantIntBuildVectorOrConstantInt(N0) &&
   2702      !isConstantIntBuildVectorOrConstantInt(N1))
   2703     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
   2704 
   2705   return SDValue();
   2706 }
   2707 
   2708 /// If this is a binary operator with two operands of the same opcode, try to
   2709 /// simplify it.
   2710 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
   2711   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
   2712   EVT VT = N0.getValueType();
   2713   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
   2714 
   2715   // Bail early if none of these transforms apply.
   2716   if (N0.getNode()->getNumOperands() == 0) return SDValue();
   2717 
   2718   // For each of OP in AND/OR/XOR:
   2719   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
   2720   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
   2721   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
   2722   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
   2723   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
   2724   //
   2725   // do not sink logical op inside of a vector extend, since it may combine
   2726   // into a vsetcc.
   2727   EVT Op0VT = N0.getOperand(0).getValueType();
   2728   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
   2729        N0.getOpcode() == ISD::SIGN_EXTEND ||
   2730        N0.getOpcode() == ISD::BSWAP ||
   2731        // Avoid infinite looping with PromoteIntBinOp.
   2732        (N0.getOpcode() == ISD::ANY_EXTEND &&
   2733         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
   2734        (N0.getOpcode() == ISD::TRUNCATE &&
   2735         (!TLI.isZExtFree(VT, Op0VT) ||
   2736          !TLI.isTruncateFree(Op0VT, VT)) &&
   2737         TLI.isTypeLegal(Op0VT))) &&
   2738       !VT.isVector() &&
   2739       Op0VT == N1.getOperand(0).getValueType() &&
   2740       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
   2741     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
   2742                                  N0.getOperand(0).getValueType(),
   2743                                  N0.getOperand(0), N1.getOperand(0));
   2744     AddToWorklist(ORNode.getNode());
   2745     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
   2746   }
   2747 
   2748   // For each of OP in SHL/SRL/SRA/AND...
   2749   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
   2750   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
   2751   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
   2752   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
   2753        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
   2754       N0.getOperand(1) == N1.getOperand(1)) {
   2755     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
   2756                                  N0.getOperand(0).getValueType(),
   2757                                  N0.getOperand(0), N1.getOperand(0));
   2758     AddToWorklist(ORNode.getNode());
   2759     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
   2760                        ORNode, N0.getOperand(1));
   2761   }
   2762 
   2763   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
   2764   // Only perform this optimization after type legalization and before
   2765   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
   2766   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
   2767   // we don't want to undo this promotion.
   2768   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
   2769   // on scalars.
   2770   if ((N0.getOpcode() == ISD::BITCAST ||
   2771        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
   2772       Level == AfterLegalizeTypes) {
   2773     SDValue In0 = N0.getOperand(0);
   2774     SDValue In1 = N1.getOperand(0);
   2775     EVT In0Ty = In0.getValueType();
   2776     EVT In1Ty = In1.getValueType();
   2777     SDLoc DL(N);
   2778     // If both incoming values are integers, and the original types are the
   2779     // same.
   2780     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
   2781       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
   2782       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
   2783       AddToWorklist(Op.getNode());
   2784       return BC;
   2785     }
   2786   }
   2787 
   2788   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
   2789   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
   2790   // If both shuffles use the same mask, and both shuffle within a single
   2791   // vector, then it is worthwhile to move the swizzle after the operation.
   2792   // The type-legalizer generates this pattern when loading illegal
   2793   // vector types from memory. In many cases this allows additional shuffle
   2794   // optimizations.
   2795   // There are other cases where moving the shuffle after the xor/and/or
   2796   // is profitable even if shuffles don't perform a swizzle.
   2797   // If both shuffles use the same mask, and both shuffles have the same first
   2798   // or second operand, then it might still be profitable to move the shuffle
   2799   // after the xor/and/or operation.
   2800   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
   2801     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
   2802     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
   2803 
   2804     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
   2805            "Inputs to shuffles are not the same type");
   2806 
   2807     // Check that both shuffles use the same mask. The masks are known to be of
   2808     // the same length because the result vector type is the same.
   2809     // Check also that shuffles have only one use to avoid introducing extra
   2810     // instructions.
   2811     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
   2812         SVN0->getMask().equals(SVN1->getMask())) {
   2813       SDValue ShOp = N0->getOperand(1);
   2814 
   2815       // Don't try to fold this node if it requires introducing a
   2816       // build vector of all zeros that might be illegal at this stage.
   2817       if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
   2818         if (!LegalTypes)
   2819           ShOp = DAG.getConstant(0, SDLoc(N), VT);
   2820         else
   2821           ShOp = SDValue();
   2822       }
   2823 
   2824       // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
   2825       // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
   2826       // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
   2827       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
   2828         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
   2829                                       N0->getOperand(0), N1->getOperand(0));
   2830         AddToWorklist(NewNode.getNode());
   2831         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
   2832                                     &SVN0->getMask()[0]);
   2833       }
   2834 
   2835       // Don't try to fold this node if it requires introducing a
   2836       // build vector of all zeros that might be illegal at this stage.
   2837       ShOp = N0->getOperand(0);
   2838       if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
   2839         if (!LegalTypes)
   2840           ShOp = DAG.getConstant(0, SDLoc(N), VT);
   2841         else
   2842           ShOp = SDValue();
   2843       }
   2844 
   2845       // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
   2846       // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
   2847       // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
   2848       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
   2849         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
   2850                                       N0->getOperand(1), N1->getOperand(1));
   2851         AddToWorklist(NewNode.getNode());
   2852         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
   2853                                     &SVN0->getMask()[0]);
   2854       }
   2855     }
   2856   }
   2857 
   2858   return SDValue();
   2859 }
   2860 
   2861 /// This contains all DAGCombine rules which reduce two values combined by
   2862 /// an And operation to a single value. This makes them reusable in the context
   2863 /// of visitSELECT(). Rules involving constants are not included as
   2864 /// visitSELECT() already handles those cases.
   2865 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
   2866                                   SDNode *LocReference) {
   2867   EVT VT = N1.getValueType();
   2868 
   2869   // fold (and x, undef) -> 0
   2870   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
   2871     return DAG.getConstant(0, SDLoc(LocReference), VT);
   2872   // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
   2873   SDValue LL, LR, RL, RR, CC0, CC1;
   2874   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
   2875     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
   2876     ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
   2877 
   2878     if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
   2879         LL.getValueType().isInteger()) {
   2880       // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
   2881       if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
   2882         SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
   2883                                      LR.getValueType(), LL, RL);
   2884         AddToWorklist(ORNode.getNode());
   2885         return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
   2886       }
   2887       if (isAllOnesConstant(LR)) {
   2888         // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
   2889         if (Op1 == ISD::SETEQ) {
   2890           SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
   2891                                         LR.getValueType(), LL, RL);
   2892           AddToWorklist(ANDNode.getNode());
   2893           return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
   2894         }
   2895         // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
   2896         if (Op1 == ISD::SETGT) {
   2897           SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
   2898                                        LR.getValueType(), LL, RL);
   2899           AddToWorklist(ORNode.getNode());
   2900           return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
   2901         }
   2902       }
   2903     }
   2904     // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
   2905     if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
   2906         Op0 == Op1 && LL.getValueType().isInteger() &&
   2907       Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
   2908                             (isAllOnesConstant(LR) && isNullConstant(RR)))) {
   2909       SDLoc DL(N0);
   2910       SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
   2911                                     LL, DAG.getConstant(1, DL,
   2912                                                         LL.getValueType()));
   2913       AddToWorklist(ADDNode.getNode());
   2914       return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
   2915                           DAG.getConstant(2, DL, LL.getValueType()),
   2916                           ISD::SETUGE);
   2917     }
   2918     // canonicalize equivalent to ll == rl
   2919     if (LL == RR && LR == RL) {
   2920       Op1 = ISD::getSetCCSwappedOperands(Op1);
   2921       std::swap(RL, RR);
   2922     }
   2923     if (LL == RL && LR == RR) {
   2924       bool isInteger = LL.getValueType().isInteger();
   2925       ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
   2926       if (Result != ISD::SETCC_INVALID &&
   2927           (!LegalOperations ||
   2928            (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
   2929             TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
   2930         EVT CCVT = getSetCCResultType(LL.getValueType());
   2931         if (N0.getValueType() == CCVT ||
   2932             (!LegalOperations && N0.getValueType() == MVT::i1))
   2933           return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
   2934                               LL, LR, Result);
   2935       }
   2936     }
   2937   }
   2938 
   2939   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
   2940       VT.getSizeInBits() <= 64) {
   2941     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
   2942       APInt ADDC = ADDI->getAPIntValue();
   2943       if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
   2944         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
   2945         // immediate for an add, but it is legal if its top c2 bits are set,
   2946         // transform the ADD so the immediate doesn't need to be materialized
   2947         // in a register.
   2948         if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
   2949           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
   2950                                              SRLI->getZExtValue());
   2951           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
   2952             ADDC |= Mask;
   2953             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
   2954               SDLoc DL(N0);
   2955               SDValue NewAdd =
   2956                 DAG.getNode(ISD::ADD, DL, VT,
   2957                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
   2958               CombineTo(N0.getNode(), NewAdd);
   2959               // Return N so it doesn't get rechecked!
   2960               return SDValue(LocReference, 0);
   2961             }
   2962           }
   2963         }
   2964       }
   2965     }
   2966   }
   2967 
   2968   return SDValue();
   2969 }
   2970 
   2971 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
   2972                                    EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
   2973                                    bool &NarrowLoad) {
   2974   uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
   2975 
   2976   if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue()))
   2977     return false;
   2978 
   2979   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
   2980   LoadedVT = LoadN->getMemoryVT();
   2981 
   2982   if (ExtVT == LoadedVT &&
   2983       (!LegalOperations ||
   2984        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
   2985     // ZEXTLOAD will match without needing to change the size of the value being
   2986     // loaded.
   2987     NarrowLoad = false;
   2988     return true;
   2989   }
   2990 
   2991   // Do not change the width of a volatile load.
   2992   if (LoadN->isVolatile())
   2993     return false;
   2994 
   2995   // Do not generate loads of non-round integer types since these can
   2996   // be expensive (and would be wrong if the type is not byte sized).
   2997   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
   2998     return false;
   2999 
   3000   if (LegalOperations &&
   3001       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
   3002     return false;
   3003 
   3004   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
   3005     return false;
   3006 
   3007   NarrowLoad = true;
   3008   return true;
   3009 }
   3010 
   3011 SDValue DAGCombiner::visitAND(SDNode *N) {
   3012   SDValue N0 = N->getOperand(0);
   3013   SDValue N1 = N->getOperand(1);
   3014   EVT VT = N1.getValueType();
   3015 
   3016   // fold vector ops
   3017   if (VT.isVector()) {
   3018     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   3019       return FoldedVOp;
   3020 
   3021     // fold (and x, 0) -> 0, vector edition
   3022     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   3023       // do not return N0, because undef node may exist in N0
   3024       return DAG.getConstant(
   3025           APInt::getNullValue(
   3026               N0.getValueType().getScalarType().getSizeInBits()),
   3027           SDLoc(N), N0.getValueType());
   3028     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   3029       // do not return N1, because undef node may exist in N1
   3030       return DAG.getConstant(
   3031           APInt::getNullValue(
   3032               N1.getValueType().getScalarType().getSizeInBits()),
   3033           SDLoc(N), N1.getValueType());
   3034 
   3035     // fold (and x, -1) -> x, vector edition
   3036     if (ISD::isBuildVectorAllOnes(N0.getNode()))
   3037       return N1;
   3038     if (ISD::isBuildVectorAllOnes(N1.getNode()))
   3039       return N0;
   3040   }
   3041 
   3042   // fold (and c1, c2) -> c1&c2
   3043   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   3044   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   3045   if (N0C && N1C && !N1C->isOpaque())
   3046     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
   3047   // canonicalize constant to RHS
   3048   if (isConstantIntBuildVectorOrConstantInt(N0) &&
   3049      !isConstantIntBuildVectorOrConstantInt(N1))
   3050     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
   3051   // fold (and x, -1) -> x
   3052   if (isAllOnesConstant(N1))
   3053     return N0;
   3054   // if (and x, c) is known to be zero, return 0
   3055   unsigned BitWidth = VT.getScalarType().getSizeInBits();
   3056   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
   3057                                    APInt::getAllOnesValue(BitWidth)))
   3058     return DAG.getConstant(0, SDLoc(N), VT);
   3059   // reassociate and
   3060   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
   3061     return RAND;
   3062   // fold (and (or x, C), D) -> D if (C & D) == D
   3063   if (N1C && N0.getOpcode() == ISD::OR)
   3064     if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
   3065       if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
   3066         return N1;
   3067   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
   3068   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
   3069     SDValue N0Op0 = N0.getOperand(0);
   3070     APInt Mask = ~N1C->getAPIntValue();
   3071     Mask = Mask.trunc(N0Op0.getValueSizeInBits());
   3072     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
   3073       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
   3074                                  N0.getValueType(), N0Op0);
   3075 
   3076       // Replace uses of the AND with uses of the Zero extend node.
   3077       CombineTo(N, Zext);
   3078 
   3079       // We actually want to replace all uses of the any_extend with the
   3080       // zero_extend, to avoid duplicating things.  This will later cause this
   3081       // AND to be folded.
   3082       CombineTo(N0.getNode(), Zext);
   3083       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   3084     }
   3085   }
   3086   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
   3087   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
   3088   // already be zero by virtue of the width of the base type of the load.
   3089   //
   3090   // the 'X' node here can either be nothing or an extract_vector_elt to catch
   3091   // more cases.
   3092   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
   3093        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
   3094        N0.getOperand(0).getOpcode() == ISD::LOAD) ||
   3095       N0.getOpcode() == ISD::LOAD) {
   3096     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
   3097                                          N0 : N0.getOperand(0) );
   3098 
   3099     // Get the constant (if applicable) the zero'th operand is being ANDed with.
   3100     // This can be a pure constant or a vector splat, in which case we treat the
   3101     // vector as a scalar and use the splat value.
   3102     APInt Constant = APInt::getNullValue(1);
   3103     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
   3104       Constant = C->getAPIntValue();
   3105     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
   3106       APInt SplatValue, SplatUndef;
   3107       unsigned SplatBitSize;
   3108       bool HasAnyUndefs;
   3109       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
   3110                                              SplatBitSize, HasAnyUndefs);
   3111       if (IsSplat) {
   3112         // Undef bits can contribute to a possible optimisation if set, so
   3113         // set them.
   3114         SplatValue |= SplatUndef;
   3115 
   3116         // The splat value may be something like "0x00FFFFFF", which means 0 for
   3117         // the first vector value and FF for the rest, repeating. We need a mask
   3118         // that will apply equally to all members of the vector, so AND all the
   3119         // lanes of the constant together.
   3120         EVT VT = Vector->getValueType(0);
   3121         unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
   3122 
   3123         // If the splat value has been compressed to a bitlength lower
   3124         // than the size of the vector lane, we need to re-expand it to
   3125         // the lane size.
   3126         if (BitWidth > SplatBitSize)
   3127           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
   3128                SplatBitSize < BitWidth;
   3129                SplatBitSize = SplatBitSize * 2)
   3130             SplatValue |= SplatValue.shl(SplatBitSize);
   3131 
   3132         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
   3133         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
   3134         if (SplatBitSize % BitWidth == 0) {
   3135           Constant = APInt::getAllOnesValue(BitWidth);
   3136           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
   3137             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
   3138         }
   3139       }
   3140     }
   3141 
   3142     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
   3143     // actually legal and isn't going to get expanded, else this is a false
   3144     // optimisation.
   3145     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
   3146                                                     Load->getValueType(0),
   3147                                                     Load->getMemoryVT());
   3148 
   3149     // Resize the constant to the same size as the original memory access before
   3150     // extension. If it is still the AllOnesValue then this AND is completely
   3151     // unneeded.
   3152     Constant =
   3153       Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
   3154 
   3155     bool B;
   3156     switch (Load->getExtensionType()) {
   3157     default: B = false; break;
   3158     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
   3159     case ISD::ZEXTLOAD:
   3160     case ISD::NON_EXTLOAD: B = true; break;
   3161     }
   3162 
   3163     if (B && Constant.isAllOnesValue()) {
   3164       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
   3165       // preserve semantics once we get rid of the AND.
   3166       SDValue NewLoad(Load, 0);
   3167       if (Load->getExtensionType() == ISD::EXTLOAD) {
   3168         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
   3169                               Load->getValueType(0), SDLoc(Load),
   3170                               Load->getChain(), Load->getBasePtr(),
   3171                               Load->getOffset(), Load->getMemoryVT(),
   3172                               Load->getMemOperand());
   3173         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
   3174         if (Load->getNumValues() == 3) {
   3175           // PRE/POST_INC loads have 3 values.
   3176           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
   3177                            NewLoad.getValue(2) };
   3178           CombineTo(Load, To, 3, true);
   3179         } else {
   3180           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
   3181         }
   3182       }
   3183 
   3184       // Fold the AND away, taking care not to fold to the old load node if we
   3185       // replaced it.
   3186       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
   3187 
   3188       return SDValue(N, 0); // Return N so it doesn't get rechecked!
   3189     }
   3190   }
   3191 
   3192   // fold (and (load x), 255) -> (zextload x, i8)
   3193   // fold (and (extload x, i16), 255) -> (zextload x, i8)
   3194   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
   3195   if (N1C && (N0.getOpcode() == ISD::LOAD ||
   3196               (N0.getOpcode() == ISD::ANY_EXTEND &&
   3197                N0.getOperand(0).getOpcode() == ISD::LOAD))) {
   3198     bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
   3199     LoadSDNode *LN0 = HasAnyExt
   3200       ? cast<LoadSDNode>(N0.getOperand(0))
   3201       : cast<LoadSDNode>(N0);
   3202     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
   3203         LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
   3204       auto NarrowLoad = false;
   3205       EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
   3206       EVT ExtVT, LoadedVT;
   3207       if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
   3208                            NarrowLoad)) {
   3209         if (!NarrowLoad) {
   3210           SDValue NewLoad =
   3211             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
   3212                            LN0->getChain(), LN0->getBasePtr(), ExtVT,
   3213                            LN0->getMemOperand());
   3214           AddToWorklist(N);
   3215           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
   3216           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   3217         } else {
   3218           EVT PtrType = LN0->getOperand(1).getValueType();
   3219 
   3220           unsigned Alignment = LN0->getAlignment();
   3221           SDValue NewPtr = LN0->getBasePtr();
   3222 
   3223           // For big endian targets, we need to add an offset to the pointer
   3224           // to load the correct bytes.  For little endian systems, we merely
   3225           // need to read fewer bytes from the same pointer.
   3226           if (DAG.getDataLayout().isBigEndian()) {
   3227             unsigned LVTStoreBytes = LoadedVT.getStoreSize();
   3228             unsigned EVTStoreBytes = ExtVT.getStoreSize();
   3229             unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
   3230             SDLoc DL(LN0);
   3231             NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
   3232                                  NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
   3233             Alignment = MinAlign(Alignment, PtrOff);
   3234           }
   3235 
   3236           AddToWorklist(NewPtr.getNode());
   3237 
   3238           SDValue Load =
   3239             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
   3240                            LN0->getChain(), NewPtr,
   3241                            LN0->getPointerInfo(),
   3242                            ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
   3243                            LN0->isInvariant(), Alignment, LN0->getAAInfo());
   3244           AddToWorklist(N);
   3245           CombineTo(LN0, Load, Load.getValue(1));
   3246           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   3247         }
   3248       }
   3249     }
   3250   }
   3251 
   3252   if (SDValue Combined = visitANDLike(N0, N1, N))
   3253     return Combined;
   3254 
   3255   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
   3256   if (N0.getOpcode() == N1.getOpcode())
   3257     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
   3258       return Tmp;
   3259 
   3260   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
   3261   // fold (and (sra)) -> (and (srl)) when possible.
   3262   if (!VT.isVector() &&
   3263       SimplifyDemandedBits(SDValue(N, 0)))
   3264     return SDValue(N, 0);
   3265 
   3266   // fold (zext_inreg (extload x)) -> (zextload x)
   3267   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
   3268     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   3269     EVT MemVT = LN0->getMemoryVT();
   3270     // If we zero all the possible extended bits, then we can turn this into
   3271     // a zextload if we are running before legalize or the operation is legal.
   3272     unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
   3273     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
   3274                            BitWidth - MemVT.getScalarType().getSizeInBits())) &&
   3275         ((!LegalOperations && !LN0->isVolatile()) ||
   3276          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
   3277       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
   3278                                        LN0->getChain(), LN0->getBasePtr(),
   3279                                        MemVT, LN0->getMemOperand());
   3280       AddToWorklist(N);
   3281       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
   3282       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   3283     }
   3284   }
   3285   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
   3286   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
   3287       N0.hasOneUse()) {
   3288     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   3289     EVT MemVT = LN0->getMemoryVT();
   3290     // If we zero all the possible extended bits, then we can turn this into
   3291     // a zextload if we are running before legalize or the operation is legal.
   3292     unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
   3293     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
   3294                            BitWidth - MemVT.getScalarType().getSizeInBits())) &&
   3295         ((!LegalOperations && !LN0->isVolatile()) ||
   3296          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
   3297       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
   3298                                        LN0->getChain(), LN0->getBasePtr(),
   3299                                        MemVT, LN0->getMemOperand());
   3300       AddToWorklist(N);
   3301       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
   3302       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   3303     }
   3304   }
   3305   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
   3306   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
   3307     SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
   3308                                        N0.getOperand(1), false);
   3309     if (BSwap.getNode())
   3310       return BSwap;
   3311   }
   3312 
   3313   return SDValue();
   3314 }
   3315 
   3316 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
   3317 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
   3318                                         bool DemandHighBits) {
   3319   if (!LegalOperations)
   3320     return SDValue();
   3321 
   3322   EVT VT = N->getValueType(0);
   3323   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
   3324     return SDValue();
   3325   if (!TLI.isOperationLegal(ISD::BSWAP, VT))
   3326     return SDValue();
   3327 
   3328   // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
   3329   bool LookPassAnd0 = false;
   3330   bool LookPassAnd1 = false;
   3331   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
   3332       std::swap(N0, N1);
   3333   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
   3334       std::swap(N0, N1);
   3335   if (N0.getOpcode() == ISD::AND) {
   3336     if (!N0.getNode()->hasOneUse())
   3337       return SDValue();
   3338     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   3339     if (!N01C || N01C->getZExtValue() != 0xFF00)
   3340       return SDValue();
   3341     N0 = N0.getOperand(0);
   3342     LookPassAnd0 = true;
   3343   }
   3344 
   3345   if (N1.getOpcode() == ISD::AND) {
   3346     if (!N1.getNode()->hasOneUse())
   3347       return SDValue();
   3348     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
   3349     if (!N11C || N11C->getZExtValue() != 0xFF)
   3350       return SDValue();
   3351     N1 = N1.getOperand(0);
   3352     LookPassAnd1 = true;
   3353   }
   3354 
   3355   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
   3356     std::swap(N0, N1);
   3357   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
   3358     return SDValue();
   3359   if (!N0.getNode()->hasOneUse() ||
   3360       !N1.getNode()->hasOneUse())
   3361     return SDValue();
   3362 
   3363   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   3364   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
   3365   if (!N01C || !N11C)
   3366     return SDValue();
   3367   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
   3368     return SDValue();
   3369 
   3370   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
   3371   SDValue N00 = N0->getOperand(0);
   3372   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
   3373     if (!N00.getNode()->hasOneUse())
   3374       return SDValue();
   3375     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
   3376     if (!N001C || N001C->getZExtValue() != 0xFF)
   3377       return SDValue();
   3378     N00 = N00.getOperand(0);
   3379     LookPassAnd0 = true;
   3380   }
   3381 
   3382   SDValue N10 = N1->getOperand(0);
   3383   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
   3384     if (!N10.getNode()->hasOneUse())
   3385       return SDValue();
   3386     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
   3387     if (!N101C || N101C->getZExtValue() != 0xFF00)
   3388       return SDValue();
   3389     N10 = N10.getOperand(0);
   3390     LookPassAnd1 = true;
   3391   }
   3392 
   3393   if (N00 != N10)
   3394     return SDValue();
   3395 
   3396   // Make sure everything beyond the low halfword gets set to zero since the SRL
   3397   // 16 will clear the top bits.
   3398   unsigned OpSizeInBits = VT.getSizeInBits();
   3399   if (DemandHighBits && OpSizeInBits > 16) {
   3400     // If the left-shift isn't masked out then the only way this is a bswap is
   3401     // if all bits beyond the low 8 are 0. In that case the entire pattern
   3402     // reduces to a left shift anyway: leave it for other parts of the combiner.
   3403     if (!LookPassAnd0)
   3404       return SDValue();
   3405 
   3406     // However, if the right shift isn't masked out then it might be because
   3407     // it's not needed. See if we can spot that too.
   3408     if (!LookPassAnd1 &&
   3409         !DAG.MaskedValueIsZero(
   3410             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
   3411       return SDValue();
   3412   }
   3413 
   3414   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
   3415   if (OpSizeInBits > 16) {
   3416     SDLoc DL(N);
   3417     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
   3418                       DAG.getConstant(OpSizeInBits - 16, DL,
   3419                                       getShiftAmountTy(VT)));
   3420   }
   3421   return Res;
   3422 }
   3423 
   3424 /// Return true if the specified node is an element that makes up a 32-bit
   3425 /// packed halfword byteswap.
   3426 /// ((x & 0x000000ff) << 8) |
   3427 /// ((x & 0x0000ff00) >> 8) |
   3428 /// ((x & 0x00ff0000) << 8) |
   3429 /// ((x & 0xff000000) >> 8)
   3430 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
   3431   if (!N.getNode()->hasOneUse())
   3432     return false;
   3433 
   3434   unsigned Opc = N.getOpcode();
   3435   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
   3436     return false;
   3437 
   3438   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
   3439   if (!N1C)
   3440     return false;
   3441 
   3442   unsigned Num;
   3443   switch (N1C->getZExtValue()) {
   3444   default:
   3445     return false;
   3446   case 0xFF:       Num = 0; break;
   3447   case 0xFF00:     Num = 1; break;
   3448   case 0xFF0000:   Num = 2; break;
   3449   case 0xFF000000: Num = 3; break;
   3450   }
   3451 
   3452   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
   3453   SDValue N0 = N.getOperand(0);
   3454   if (Opc == ISD::AND) {
   3455     if (Num == 0 || Num == 2) {
   3456       // (x >> 8) & 0xff
   3457       // (x >> 8) & 0xff0000
   3458       if (N0.getOpcode() != ISD::SRL)
   3459         return false;
   3460       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   3461       if (!C || C->getZExtValue() != 8)
   3462         return false;
   3463     } else {
   3464       // (x << 8) & 0xff00
   3465       // (x << 8) & 0xff000000
   3466       if (N0.getOpcode() != ISD::SHL)
   3467         return false;
   3468       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   3469       if (!C || C->getZExtValue() != 8)
   3470         return false;
   3471     }
   3472   } else if (Opc == ISD::SHL) {
   3473     // (x & 0xff) << 8
   3474     // (x & 0xff0000) << 8
   3475     if (Num != 0 && Num != 2)
   3476       return false;
   3477     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
   3478     if (!C || C->getZExtValue() != 8)
   3479       return false;
   3480   } else { // Opc == ISD::SRL
   3481     // (x & 0xff00) >> 8
   3482     // (x & 0xff000000) >> 8
   3483     if (Num != 1 && Num != 3)
   3484       return false;
   3485     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
   3486     if (!C || C->getZExtValue() != 8)
   3487       return false;
   3488   }
   3489 
   3490   if (Parts[Num])
   3491     return false;
   3492 
   3493   Parts[Num] = N0.getOperand(0).getNode();
   3494   return true;
   3495 }
   3496 
   3497 /// Match a 32-bit packed halfword bswap. That is
   3498 /// ((x & 0x000000ff) << 8) |
   3499 /// ((x & 0x0000ff00) >> 8) |
   3500 /// ((x & 0x00ff0000) << 8) |
   3501 /// ((x & 0xff000000) >> 8)
   3502 /// => (rotl (bswap x), 16)
   3503 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
   3504   if (!LegalOperations)
   3505     return SDValue();
   3506 
   3507   EVT VT = N->getValueType(0);
   3508   if (VT != MVT::i32)
   3509     return SDValue();
   3510   if (!TLI.isOperationLegal(ISD::BSWAP, VT))
   3511     return SDValue();
   3512 
   3513   // Look for either
   3514   // (or (or (and), (and)), (or (and), (and)))
   3515   // (or (or (or (and), (and)), (and)), (and))
   3516   if (N0.getOpcode() != ISD::OR)
   3517     return SDValue();
   3518   SDValue N00 = N0.getOperand(0);
   3519   SDValue N01 = N0.getOperand(1);
   3520   SDNode *Parts[4] = {};
   3521 
   3522   if (N1.getOpcode() == ISD::OR &&
   3523       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
   3524     // (or (or (and), (and)), (or (and), (and)))
   3525     SDValue N000 = N00.getOperand(0);
   3526     if (!isBSwapHWordElement(N000, Parts))
   3527       return SDValue();
   3528 
   3529     SDValue N001 = N00.getOperand(1);
   3530     if (!isBSwapHWordElement(N001, Parts))
   3531       return SDValue();
   3532     SDValue N010 = N01.getOperand(0);
   3533     if (!isBSwapHWordElement(N010, Parts))
   3534       return SDValue();
   3535     SDValue N011 = N01.getOperand(1);
   3536     if (!isBSwapHWordElement(N011, Parts))
   3537       return SDValue();
   3538   } else {
   3539     // (or (or (or (and), (and)), (and)), (and))
   3540     if (!isBSwapHWordElement(N1, Parts))
   3541       return SDValue();
   3542     if (!isBSwapHWordElement(N01, Parts))
   3543       return SDValue();
   3544     if (N00.getOpcode() != ISD::OR)
   3545       return SDValue();
   3546     SDValue N000 = N00.getOperand(0);
   3547     if (!isBSwapHWordElement(N000, Parts))
   3548       return SDValue();
   3549     SDValue N001 = N00.getOperand(1);
   3550     if (!isBSwapHWordElement(N001, Parts))
   3551       return SDValue();
   3552   }
   3553 
   3554   // Make sure the parts are all coming from the same node.
   3555   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
   3556     return SDValue();
   3557 
   3558   SDLoc DL(N);
   3559   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
   3560                               SDValue(Parts[0], 0));
   3561 
   3562   // Result of the bswap should be rotated by 16. If it's not legal, then
   3563   // do  (x << 16) | (x >> 16).
   3564   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
   3565   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
   3566     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
   3567   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
   3568     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
   3569   return DAG.getNode(ISD::OR, DL, VT,
   3570                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
   3571                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
   3572 }
   3573 
   3574 /// This contains all DAGCombine rules which reduce two values combined by
   3575 /// an Or operation to a single value \see visitANDLike().
   3576 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
   3577   EVT VT = N1.getValueType();
   3578   // fold (or x, undef) -> -1
   3579   if (!LegalOperations &&
   3580       (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
   3581     EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
   3582     return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
   3583                            SDLoc(LocReference), VT);
   3584   }
   3585   // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
   3586   SDValue LL, LR, RL, RR, CC0, CC1;
   3587   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
   3588     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
   3589     ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
   3590 
   3591     if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) {
   3592       // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
   3593       // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
   3594       if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
   3595         SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
   3596                                      LR.getValueType(), LL, RL);
   3597         AddToWorklist(ORNode.getNode());
   3598         return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
   3599       }
   3600       // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
   3601       // fold (or (setgt X, -1), (setgt Y  -1)) -> (setgt (and X, Y), -1)
   3602       if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
   3603         SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
   3604                                       LR.getValueType(), LL, RL);
   3605         AddToWorklist(ANDNode.getNode());
   3606         return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
   3607       }
   3608     }
   3609     // canonicalize equivalent to ll == rl
   3610     if (LL == RR && LR == RL) {
   3611       Op1 = ISD::getSetCCSwappedOperands(Op1);
   3612       std::swap(RL, RR);
   3613     }
   3614     if (LL == RL && LR == RR) {
   3615       bool isInteger = LL.getValueType().isInteger();
   3616       ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
   3617       if (Result != ISD::SETCC_INVALID &&
   3618           (!LegalOperations ||
   3619            (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
   3620             TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
   3621         EVT CCVT = getSetCCResultType(LL.getValueType());
   3622         if (N0.getValueType() == CCVT ||
   3623             (!LegalOperations && N0.getValueType() == MVT::i1))
   3624           return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
   3625                               LL, LR, Result);
   3626       }
   3627     }
   3628   }
   3629 
   3630   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
   3631   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
   3632       // Don't increase # computations.
   3633       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
   3634     // We can only do this xform if we know that bits from X that are set in C2
   3635     // but not in C1 are already zero.  Likewise for Y.
   3636     if (const ConstantSDNode *N0O1C =
   3637         getAsNonOpaqueConstant(N0.getOperand(1))) {
   3638       if (const ConstantSDNode *N1O1C =
   3639           getAsNonOpaqueConstant(N1.getOperand(1))) {
   3640         // We can only do this xform if we know that bits from X that are set in
   3641         // C2 but not in C1 are already zero.  Likewise for Y.
   3642         const APInt &LHSMask = N0O1C->getAPIntValue();
   3643         const APInt &RHSMask = N1O1C->getAPIntValue();
   3644 
   3645         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
   3646             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
   3647           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
   3648                                   N0.getOperand(0), N1.getOperand(0));
   3649           SDLoc DL(LocReference);
   3650           return DAG.getNode(ISD::AND, DL, VT, X,
   3651                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
   3652         }
   3653       }
   3654     }
   3655   }
   3656 
   3657   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
   3658   if (N0.getOpcode() == ISD::AND &&
   3659       N1.getOpcode() == ISD::AND &&
   3660       N0.getOperand(0) == N1.getOperand(0) &&
   3661       // Don't increase # computations.
   3662       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
   3663     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
   3664                             N0.getOperand(1), N1.getOperand(1));
   3665     return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X);
   3666   }
   3667 
   3668   return SDValue();
   3669 }
   3670 
   3671 SDValue DAGCombiner::visitOR(SDNode *N) {
   3672   SDValue N0 = N->getOperand(0);
   3673   SDValue N1 = N->getOperand(1);
   3674   EVT VT = N1.getValueType();
   3675 
   3676   // fold vector ops
   3677   if (VT.isVector()) {
   3678     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   3679       return FoldedVOp;
   3680 
   3681     // fold (or x, 0) -> x, vector edition
   3682     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   3683       return N1;
   3684     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   3685       return N0;
   3686 
   3687     // fold (or x, -1) -> -1, vector edition
   3688     if (ISD::isBuildVectorAllOnes(N0.getNode()))
   3689       // do not return N0, because undef node may exist in N0
   3690       return DAG.getConstant(
   3691           APInt::getAllOnesValue(
   3692               N0.getValueType().getScalarType().getSizeInBits()),
   3693           SDLoc(N), N0.getValueType());
   3694     if (ISD::isBuildVectorAllOnes(N1.getNode()))
   3695       // do not return N1, because undef node may exist in N1
   3696       return DAG.getConstant(
   3697           APInt::getAllOnesValue(
   3698               N1.getValueType().getScalarType().getSizeInBits()),
   3699           SDLoc(N), N1.getValueType());
   3700 
   3701     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
   3702     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2)
   3703     // Do this only if the resulting shuffle is legal.
   3704     if (isa<ShuffleVectorSDNode>(N0) &&
   3705         isa<ShuffleVectorSDNode>(N1) &&
   3706         // Avoid folding a node with illegal type.
   3707         TLI.isTypeLegal(VT) &&
   3708         N0->getOperand(1) == N1->getOperand(1) &&
   3709         ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) {
   3710       bool CanFold = true;
   3711       unsigned NumElts = VT.getVectorNumElements();
   3712       const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
   3713       const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
   3714       // We construct two shuffle masks:
   3715       // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand
   3716       // and N1 as the second operand.
   3717       // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand
   3718       // and N0 as the second operand.
   3719       // We do this because OR is commutable and therefore there might be
   3720       // two ways to fold this node into a shuffle.
   3721       SmallVector<int,4> Mask1;
   3722       SmallVector<int,4> Mask2;
   3723 
   3724       for (unsigned i = 0; i != NumElts && CanFold; ++i) {
   3725         int M0 = SV0->getMaskElt(i);
   3726         int M1 = SV1->getMaskElt(i);
   3727 
   3728         // Both shuffle indexes are undef. Propagate Undef.
   3729         if (M0 < 0 && M1 < 0) {
   3730           Mask1.push_back(M0);
   3731           Mask2.push_back(M0);
   3732           continue;
   3733         }
   3734 
   3735         if (M0 < 0 || M1 < 0 ||
   3736             (M0 < (int)NumElts && M1 < (int)NumElts) ||
   3737             (M0 >= (int)NumElts && M1 >= (int)NumElts)) {
   3738           CanFold = false;
   3739           break;
   3740         }
   3741 
   3742         Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts);
   3743         Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts);
   3744       }
   3745 
   3746       if (CanFold) {
   3747         // Fold this sequence only if the resulting shuffle is 'legal'.
   3748         if (TLI.isShuffleMaskLegal(Mask1, VT))
   3749           return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0),
   3750                                       N1->getOperand(0), &Mask1[0]);
   3751         if (TLI.isShuffleMaskLegal(Mask2, VT))
   3752           return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0),
   3753                                       N0->getOperand(0), &Mask2[0]);
   3754       }
   3755     }
   3756   }
   3757 
   3758   // fold (or c1, c2) -> c1|c2
   3759   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   3760   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   3761   if (N0C && N1C && !N1C->isOpaque())
   3762     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
   3763   // canonicalize constant to RHS
   3764   if (isConstantIntBuildVectorOrConstantInt(N0) &&
   3765      !isConstantIntBuildVectorOrConstantInt(N1))
   3766     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
   3767   // fold (or x, 0) -> x
   3768   if (isNullConstant(N1))
   3769     return N0;
   3770   // fold (or x, -1) -> -1
   3771   if (isAllOnesConstant(N1))
   3772     return N1;
   3773   // fold (or x, c) -> c iff (x & ~c) == 0
   3774   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
   3775     return N1;
   3776 
   3777   if (SDValue Combined = visitORLike(N0, N1, N))
   3778     return Combined;
   3779 
   3780   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
   3781   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
   3782     return BSwap;
   3783   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
   3784     return BSwap;
   3785 
   3786   // reassociate or
   3787   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
   3788     return ROR;
   3789   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
   3790   // iff (c1 & c2) == 0.
   3791   if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
   3792              isa<ConstantSDNode>(N0.getOperand(1))) {
   3793     ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
   3794     if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
   3795       if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
   3796                                                    N1C, C1))
   3797         return DAG.getNode(
   3798             ISD::AND, SDLoc(N), VT,
   3799             DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
   3800       return SDValue();
   3801     }
   3802   }
   3803   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
   3804   if (N0.getOpcode() == N1.getOpcode())
   3805     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
   3806       return Tmp;
   3807 
   3808   // See if this is some rotate idiom.
   3809   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
   3810     return SDValue(Rot, 0);
   3811 
   3812   // Simplify the operands using demanded-bits information.
   3813   if (!VT.isVector() &&
   3814       SimplifyDemandedBits(SDValue(N, 0)))
   3815     return SDValue(N, 0);
   3816 
   3817   return SDValue();
   3818 }
   3819 
   3820 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
   3821 static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
   3822   if (Op.getOpcode() == ISD::AND) {
   3823     if (isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
   3824       Mask = Op.getOperand(1);
   3825       Op = Op.getOperand(0);
   3826     } else {
   3827       return false;
   3828     }
   3829   }
   3830 
   3831   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
   3832     Shift = Op;
   3833     return true;
   3834   }
   3835 
   3836   return false;
   3837 }
   3838 
   3839 // Return true if we can prove that, whenever Neg and Pos are both in the
   3840 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
   3841 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
   3842 //
   3843 //     (or (shift1 X, Neg), (shift2 X, Pos))
   3844 //
   3845 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
   3846 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
   3847 // to consider shift amounts with defined behavior.
   3848 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
   3849   // If EltSize is a power of 2 then:
   3850   //
   3851   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
   3852   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
   3853   //
   3854   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
   3855   // for the stronger condition:
   3856   //
   3857   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
   3858   //
   3859   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
   3860   // we can just replace Neg with Neg' for the rest of the function.
   3861   //
   3862   // In other cases we check for the even stronger condition:
   3863   //
   3864   //     Neg == EltSize - Pos                                    [B]
   3865   //
   3866   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
   3867   // behavior if Pos == 0 (and consequently Neg == EltSize).
   3868   //
   3869   // We could actually use [A] whenever EltSize is a power of 2, but the
   3870   // only extra cases that it would match are those uninteresting ones
   3871   // where Neg and Pos are never in range at the same time.  E.g. for
   3872   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
   3873   // as well as (sub 32, Pos), but:
   3874   //
   3875   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
   3876   //
   3877   // always invokes undefined behavior for 32-bit X.
   3878   //
   3879   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
   3880   unsigned MaskLoBits = 0;
   3881   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
   3882     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
   3883       if (NegC->getAPIntValue() == EltSize - 1) {
   3884         Neg = Neg.getOperand(0);
   3885         MaskLoBits = Log2_64(EltSize);
   3886       }
   3887     }
   3888   }
   3889 
   3890   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
   3891   if (Neg.getOpcode() != ISD::SUB)
   3892     return false;
   3893   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
   3894   if (!NegC)
   3895     return false;
   3896   SDValue NegOp1 = Neg.getOperand(1);
   3897 
   3898   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
   3899   // Pos'.  The truncation is redundant for the purpose of the equality.
   3900   if (MaskLoBits && Pos.getOpcode() == ISD::AND)
   3901     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
   3902       if (PosC->getAPIntValue() == EltSize - 1)
   3903         Pos = Pos.getOperand(0);
   3904 
   3905   // The condition we need is now:
   3906   //
   3907   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
   3908   //
   3909   // If NegOp1 == Pos then we need:
   3910   //
   3911   //              EltSize & Mask == NegC & Mask
   3912   //
   3913   // (because "x & Mask" is a truncation and distributes through subtraction).
   3914   APInt Width;
   3915   if (Pos == NegOp1)
   3916     Width = NegC->getAPIntValue();
   3917 
   3918   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
   3919   // Then the condition we want to prove becomes:
   3920   //
   3921   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
   3922   //
   3923   // which, again because "x & Mask" is a truncation, becomes:
   3924   //
   3925   //                NegC & Mask == (EltSize - PosC) & Mask
   3926   //             EltSize & Mask == (NegC + PosC) & Mask
   3927   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
   3928     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
   3929       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
   3930     else
   3931       return false;
   3932   } else
   3933     return false;
   3934 
   3935   // Now we just need to check that EltSize & Mask == Width & Mask.
   3936   if (MaskLoBits)
   3937     // EltSize & Mask is 0 since Mask is EltSize - 1.
   3938     return Width.getLoBits(MaskLoBits) == 0;
   3939   return Width == EltSize;
   3940 }
   3941 
   3942 // A subroutine of MatchRotate used once we have found an OR of two opposite
   3943 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
   3944 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
   3945 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
   3946 // Neg with outer conversions stripped away.
   3947 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
   3948                                        SDValue Neg, SDValue InnerPos,
   3949                                        SDValue InnerNeg, unsigned PosOpcode,
   3950                                        unsigned NegOpcode, SDLoc DL) {
   3951   // fold (or (shl x, (*ext y)),
   3952   //          (srl x, (*ext (sub 32, y)))) ->
   3953   //   (rotl x, y) or (rotr x, (sub 32, y))
   3954   //
   3955   // fold (or (shl x, (*ext (sub 32, y))),
   3956   //          (srl x, (*ext y))) ->
   3957   //   (rotr x, y) or (rotl x, (sub 32, y))
   3958   EVT VT = Shifted.getValueType();
   3959   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
   3960     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
   3961     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
   3962                        HasPos ? Pos : Neg).getNode();
   3963   }
   3964 
   3965   return nullptr;
   3966 }
   3967 
   3968 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
   3969 // idioms for rotate, and if the target supports rotation instructions, generate
   3970 // a rot[lr].
   3971 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
   3972   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
   3973   EVT VT = LHS.getValueType();
   3974   if (!TLI.isTypeLegal(VT)) return nullptr;
   3975 
   3976   // The target must have at least one rotate flavor.
   3977   bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
   3978   bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
   3979   if (!HasROTL && !HasROTR) return nullptr;
   3980 
   3981   // Match "(X shl/srl V1) & V2" where V2 may not be present.
   3982   SDValue LHSShift;   // The shift.
   3983   SDValue LHSMask;    // AND value if any.
   3984   if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
   3985     return nullptr; // Not part of a rotate.
   3986 
   3987   SDValue RHSShift;   // The shift.
   3988   SDValue RHSMask;    // AND value if any.
   3989   if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
   3990     return nullptr; // Not part of a rotate.
   3991 
   3992   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
   3993     return nullptr;   // Not shifting the same value.
   3994 
   3995   if (LHSShift.getOpcode() == RHSShift.getOpcode())
   3996     return nullptr;   // Shifts must disagree.
   3997 
   3998   // Canonicalize shl to left side in a shl/srl pair.
   3999   if (RHSShift.getOpcode() == ISD::SHL) {
   4000     std::swap(LHS, RHS);
   4001     std::swap(LHSShift, RHSShift);
   4002     std::swap(LHSMask, RHSMask);
   4003   }
   4004 
   4005   unsigned EltSizeInBits = VT.getScalarSizeInBits();
   4006   SDValue LHSShiftArg = LHSShift.getOperand(0);
   4007   SDValue LHSShiftAmt = LHSShift.getOperand(1);
   4008   SDValue RHSShiftArg = RHSShift.getOperand(0);
   4009   SDValue RHSShiftAmt = RHSShift.getOperand(1);
   4010 
   4011   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
   4012   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
   4013   if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) {
   4014     uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue();
   4015     uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue();
   4016     if ((LShVal + RShVal) != EltSizeInBits)
   4017       return nullptr;
   4018 
   4019     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
   4020                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
   4021 
   4022     // If there is an AND of either shifted operand, apply it to the result.
   4023     if (LHSMask.getNode() || RHSMask.getNode()) {
   4024       APInt AllBits = APInt::getAllOnesValue(EltSizeInBits);
   4025       SDValue Mask = DAG.getConstant(AllBits, DL, VT);
   4026 
   4027       if (LHSMask.getNode()) {
   4028         APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
   4029         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
   4030                            DAG.getNode(ISD::OR, DL, VT, LHSMask,
   4031                                        DAG.getConstant(RHSBits, DL, VT)));
   4032       }
   4033       if (RHSMask.getNode()) {
   4034         APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
   4035         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
   4036                            DAG.getNode(ISD::OR, DL, VT, RHSMask,
   4037                                        DAG.getConstant(LHSBits, DL, VT)));
   4038       }
   4039 
   4040       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
   4041     }
   4042 
   4043     return Rot.getNode();
   4044   }
   4045 
   4046   // If there is a mask here, and we have a variable shift, we can't be sure
   4047   // that we're masking out the right stuff.
   4048   if (LHSMask.getNode() || RHSMask.getNode())
   4049     return nullptr;
   4050 
   4051   // If the shift amount is sign/zext/any-extended just peel it off.
   4052   SDValue LExtOp0 = LHSShiftAmt;
   4053   SDValue RExtOp0 = RHSShiftAmt;
   4054   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
   4055        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
   4056        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
   4057        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
   4058       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
   4059        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
   4060        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
   4061        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
   4062     LExtOp0 = LHSShiftAmt.getOperand(0);
   4063     RExtOp0 = RHSShiftAmt.getOperand(0);
   4064   }
   4065 
   4066   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
   4067                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
   4068   if (TryL)
   4069     return TryL;
   4070 
   4071   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
   4072                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
   4073   if (TryR)
   4074     return TryR;
   4075 
   4076   return nullptr;
   4077 }
   4078 
   4079 SDValue DAGCombiner::visitXOR(SDNode *N) {
   4080   SDValue N0 = N->getOperand(0);
   4081   SDValue N1 = N->getOperand(1);
   4082   EVT VT = N0.getValueType();
   4083 
   4084   // fold vector ops
   4085   if (VT.isVector()) {
   4086     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   4087       return FoldedVOp;
   4088 
   4089     // fold (xor x, 0) -> x, vector edition
   4090     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   4091       return N1;
   4092     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   4093       return N0;
   4094   }
   4095 
   4096   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
   4097   if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
   4098     return DAG.getConstant(0, SDLoc(N), VT);
   4099   // fold (xor x, undef) -> undef
   4100   if (N0.getOpcode() == ISD::UNDEF)
   4101     return N0;
   4102   if (N1.getOpcode() == ISD::UNDEF)
   4103     return N1;
   4104   // fold (xor c1, c2) -> c1^c2
   4105   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   4106   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   4107   if (N0C && N1C)
   4108     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
   4109   // canonicalize constant to RHS
   4110   if (isConstantIntBuildVectorOrConstantInt(N0) &&
   4111      !isConstantIntBuildVectorOrConstantInt(N1))
   4112     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
   4113   // fold (xor x, 0) -> x
   4114   if (isNullConstant(N1))
   4115     return N0;
   4116   // reassociate xor
   4117   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
   4118     return RXOR;
   4119 
   4120   // fold !(x cc y) -> (x !cc y)
   4121   SDValue LHS, RHS, CC;
   4122   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
   4123     bool isInt = LHS.getValueType().isInteger();
   4124     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
   4125                                                isInt);
   4126 
   4127     if (!LegalOperations ||
   4128         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
   4129       switch (N0.getOpcode()) {
   4130       default:
   4131         llvm_unreachable("Unhandled SetCC Equivalent!");
   4132       case ISD::SETCC:
   4133         return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC);
   4134       case ISD::SELECT_CC:
   4135         return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2),
   4136                                N0.getOperand(3), NotCC);
   4137       }
   4138     }
   4139   }
   4140 
   4141   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
   4142   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
   4143       N0.getNode()->hasOneUse() &&
   4144       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
   4145     SDValue V = N0.getOperand(0);
   4146     SDLoc DL(N0);
   4147     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
   4148                     DAG.getConstant(1, DL, V.getValueType()));
   4149     AddToWorklist(V.getNode());
   4150     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
   4151   }
   4152 
   4153   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
   4154   if (isOneConstant(N1) && VT == MVT::i1 &&
   4155       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
   4156     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
   4157     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
   4158       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
   4159       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
   4160       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
   4161       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
   4162       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
   4163     }
   4164   }
   4165   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
   4166   if (isAllOnesConstant(N1) &&
   4167       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
   4168     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
   4169     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
   4170       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
   4171       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
   4172       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
   4173       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
   4174       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
   4175     }
   4176   }
   4177   // fold (xor (and x, y), y) -> (and (not x), y)
   4178   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
   4179       N0->getOperand(1) == N1) {
   4180     SDValue X = N0->getOperand(0);
   4181     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
   4182     AddToWorklist(NotX.getNode());
   4183     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
   4184   }
   4185   // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
   4186   if (N1C && N0.getOpcode() == ISD::XOR) {
   4187     if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
   4188       SDLoc DL(N);
   4189       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
   4190                          DAG.getConstant(N1C->getAPIntValue() ^
   4191                                          N00C->getAPIntValue(), DL, VT));
   4192     }
   4193     if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
   4194       SDLoc DL(N);
   4195       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
   4196                          DAG.getConstant(N1C->getAPIntValue() ^
   4197                                          N01C->getAPIntValue(), DL, VT));
   4198     }
   4199   }
   4200   // fold (xor x, x) -> 0
   4201   if (N0 == N1)
   4202     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
   4203 
   4204   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
   4205   // Here is a concrete example of this equivalence:
   4206   // i16   x ==  14
   4207   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
   4208   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
   4209   //
   4210   // =>
   4211   //
   4212   // i16     ~1      == 0b1111111111111110
   4213   // i16 rol(~1, 14) == 0b1011111111111111
   4214   //
   4215   // Some additional tips to help conceptualize this transform:
   4216   // - Try to see the operation as placing a single zero in a value of all ones.
   4217   // - There exists no value for x which would allow the result to contain zero.
   4218   // - Values of x larger than the bitwidth are undefined and do not require a
   4219   //   consistent result.
   4220   // - Pushing the zero left requires shifting one bits in from the right.
   4221   // A rotate left of ~1 is a nice way of achieving the desired result.
   4222   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
   4223       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
   4224     SDLoc DL(N);
   4225     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
   4226                        N0.getOperand(1));
   4227   }
   4228 
   4229   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
   4230   if (N0.getOpcode() == N1.getOpcode())
   4231     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
   4232       return Tmp;
   4233 
   4234   // Simplify the expression using non-local knowledge.
   4235   if (!VT.isVector() &&
   4236       SimplifyDemandedBits(SDValue(N, 0)))
   4237     return SDValue(N, 0);
   4238 
   4239   return SDValue();
   4240 }
   4241 
   4242 /// Handle transforms common to the three shifts, when the shift amount is a
   4243 /// constant.
   4244 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
   4245   SDNode *LHS = N->getOperand(0).getNode();
   4246   if (!LHS->hasOneUse()) return SDValue();
   4247 
   4248   // We want to pull some binops through shifts, so that we have (and (shift))
   4249   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
   4250   // thing happens with address calculations, so it's important to canonicalize
   4251   // it.
   4252   bool HighBitSet = false;  // Can we transform this if the high bit is set?
   4253 
   4254   switch (LHS->getOpcode()) {
   4255   default: return SDValue();
   4256   case ISD::OR:
   4257   case ISD::XOR:
   4258     HighBitSet = false; // We can only transform sra if the high bit is clear.
   4259     break;
   4260   case ISD::AND:
   4261     HighBitSet = true;  // We can only transform sra if the high bit is set.
   4262     break;
   4263   case ISD::ADD:
   4264     if (N->getOpcode() != ISD::SHL)
   4265       return SDValue(); // only shl(add) not sr[al](add).
   4266     HighBitSet = false; // We can only transform sra if the high bit is clear.
   4267     break;
   4268   }
   4269 
   4270   // We require the RHS of the binop to be a constant and not opaque as well.
   4271   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
   4272   if (!BinOpCst) return SDValue();
   4273 
   4274   // FIXME: disable this unless the input to the binop is a shift by a constant.
   4275   // If it is not a shift, it pessimizes some common cases like:
   4276   //
   4277   //    void foo(int *X, int i) { X[i & 1235] = 1; }
   4278   //    int bar(int *X, int i) { return X[i & 255]; }
   4279   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
   4280   if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
   4281        BinOpLHSVal->getOpcode() != ISD::SRA &&
   4282        BinOpLHSVal->getOpcode() != ISD::SRL) ||
   4283       !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
   4284     return SDValue();
   4285 
   4286   EVT VT = N->getValueType(0);
   4287 
   4288   // If this is a signed shift right, and the high bit is modified by the
   4289   // logical operation, do not perform the transformation. The highBitSet
   4290   // boolean indicates the value of the high bit of the constant which would
   4291   // cause it to be modified for this operation.
   4292   if (N->getOpcode() == ISD::SRA) {
   4293     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
   4294     if (BinOpRHSSignSet != HighBitSet)
   4295       return SDValue();
   4296   }
   4297 
   4298   if (!TLI.isDesirableToCommuteWithShift(LHS))
   4299     return SDValue();
   4300 
   4301   // Fold the constants, shifting the binop RHS by the shift amount.
   4302   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
   4303                                N->getValueType(0),
   4304                                LHS->getOperand(1), N->getOperand(1));
   4305   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
   4306 
   4307   // Create the new shift.
   4308   SDValue NewShift = DAG.getNode(N->getOpcode(),
   4309                                  SDLoc(LHS->getOperand(0)),
   4310                                  VT, LHS->getOperand(0), N->getOperand(1));
   4311 
   4312   // Create the new binop.
   4313   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
   4314 }
   4315 
   4316 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
   4317   assert(N->getOpcode() == ISD::TRUNCATE);
   4318   assert(N->getOperand(0).getOpcode() == ISD::AND);
   4319 
   4320   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
   4321   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
   4322     SDValue N01 = N->getOperand(0).getOperand(1);
   4323 
   4324     if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
   4325       if (!N01C->isOpaque()) {
   4326         EVT TruncVT = N->getValueType(0);
   4327         SDValue N00 = N->getOperand(0).getOperand(0);
   4328         APInt TruncC = N01C->getAPIntValue();
   4329         TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
   4330         SDLoc DL(N);
   4331 
   4332         return DAG.getNode(ISD::AND, DL, TruncVT,
   4333                            DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00),
   4334                            DAG.getConstant(TruncC, DL, TruncVT));
   4335       }
   4336     }
   4337   }
   4338 
   4339   return SDValue();
   4340 }
   4341 
   4342 SDValue DAGCombiner::visitRotate(SDNode *N) {
   4343   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
   4344   if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
   4345       N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
   4346     SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode());
   4347     if (NewOp1.getNode())
   4348       return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
   4349                          N->getOperand(0), NewOp1);
   4350   }
   4351   return SDValue();
   4352 }
   4353 
   4354 SDValue DAGCombiner::visitSHL(SDNode *N) {
   4355   SDValue N0 = N->getOperand(0);
   4356   SDValue N1 = N->getOperand(1);
   4357   EVT VT = N0.getValueType();
   4358   unsigned OpSizeInBits = VT.getScalarSizeInBits();
   4359 
   4360   // fold vector ops
   4361   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   4362   if (VT.isVector()) {
   4363     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   4364       return FoldedVOp;
   4365 
   4366     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
   4367     // If setcc produces all-one true value then:
   4368     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
   4369     if (N1CV && N1CV->isConstant()) {
   4370       if (N0.getOpcode() == ISD::AND) {
   4371         SDValue N00 = N0->getOperand(0);
   4372         SDValue N01 = N0->getOperand(1);
   4373         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
   4374 
   4375         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
   4376             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
   4377                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
   4378           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
   4379                                                      N01CV, N1CV))
   4380             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
   4381         }
   4382       } else {
   4383         N1C = isConstOrConstSplat(N1);
   4384       }
   4385     }
   4386   }
   4387 
   4388   // fold (shl c1, c2) -> c1<<c2
   4389   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   4390   if (N0C && N1C && !N1C->isOpaque())
   4391     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
   4392   // fold (shl 0, x) -> 0
   4393   if (isNullConstant(N0))
   4394     return N0;
   4395   // fold (shl x, c >= size(x)) -> undef
   4396   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
   4397     return DAG.getUNDEF(VT);
   4398   // fold (shl x, 0) -> x
   4399   if (N1C && N1C->isNullValue())
   4400     return N0;
   4401   // fold (shl undef, x) -> 0
   4402   if (N0.getOpcode() == ISD::UNDEF)
   4403     return DAG.getConstant(0, SDLoc(N), VT);
   4404   // if (shl x, c) is known to be zero, return 0
   4405   if (DAG.MaskedValueIsZero(SDValue(N, 0),
   4406                             APInt::getAllOnesValue(OpSizeInBits)))
   4407     return DAG.getConstant(0, SDLoc(N), VT);
   4408   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
   4409   if (N1.getOpcode() == ISD::TRUNCATE &&
   4410       N1.getOperand(0).getOpcode() == ISD::AND) {
   4411     SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
   4412     if (NewOp1.getNode())
   4413       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
   4414   }
   4415 
   4416   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
   4417     return SDValue(N, 0);
   4418 
   4419   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
   4420   if (N1C && N0.getOpcode() == ISD::SHL) {
   4421     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
   4422       uint64_t c1 = N0C1->getZExtValue();
   4423       uint64_t c2 = N1C->getZExtValue();
   4424       SDLoc DL(N);
   4425       if (c1 + c2 >= OpSizeInBits)
   4426         return DAG.getConstant(0, DL, VT);
   4427       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
   4428                          DAG.getConstant(c1 + c2, DL, N1.getValueType()));
   4429     }
   4430   }
   4431 
   4432   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
   4433   // For this to be valid, the second form must not preserve any of the bits
   4434   // that are shifted out by the inner shift in the first form.  This means
   4435   // the outer shift size must be >= the number of bits added by the ext.
   4436   // As a corollary, we don't care what kind of ext it is.
   4437   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
   4438               N0.getOpcode() == ISD::ANY_EXTEND ||
   4439               N0.getOpcode() == ISD::SIGN_EXTEND) &&
   4440       N0.getOperand(0).getOpcode() == ISD::SHL) {
   4441     SDValue N0Op0 = N0.getOperand(0);
   4442     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
   4443       uint64_t c1 = N0Op0C1->getZExtValue();
   4444       uint64_t c2 = N1C->getZExtValue();
   4445       EVT InnerShiftVT = N0Op0.getValueType();
   4446       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
   4447       if (c2 >= OpSizeInBits - InnerShiftSize) {
   4448         SDLoc DL(N0);
   4449         if (c1 + c2 >= OpSizeInBits)
   4450           return DAG.getConstant(0, DL, VT);
   4451         return DAG.getNode(ISD::SHL, DL, VT,
   4452                            DAG.getNode(N0.getOpcode(), DL, VT,
   4453                                        N0Op0->getOperand(0)),
   4454                            DAG.getConstant(c1 + c2, DL, N1.getValueType()));
   4455       }
   4456     }
   4457   }
   4458 
   4459   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
   4460   // Only fold this if the inner zext has no other uses to avoid increasing
   4461   // the total number of instructions.
   4462   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
   4463       N0.getOperand(0).getOpcode() == ISD::SRL) {
   4464     SDValue N0Op0 = N0.getOperand(0);
   4465     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
   4466       uint64_t c1 = N0Op0C1->getZExtValue();
   4467       if (c1 < VT.getScalarSizeInBits()) {
   4468         uint64_t c2 = N1C->getZExtValue();
   4469         if (c1 == c2) {
   4470           SDValue NewOp0 = N0.getOperand(0);
   4471           EVT CountVT = NewOp0.getOperand(1).getValueType();
   4472           SDLoc DL(N);
   4473           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
   4474                                        NewOp0,
   4475                                        DAG.getConstant(c2, DL, CountVT));
   4476           AddToWorklist(NewSHL.getNode());
   4477           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
   4478         }
   4479       }
   4480     }
   4481   }
   4482 
   4483   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
   4484   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
   4485   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
   4486       cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) {
   4487     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
   4488       uint64_t C1 = N0C1->getZExtValue();
   4489       uint64_t C2 = N1C->getZExtValue();
   4490       SDLoc DL(N);
   4491       if (C1 <= C2)
   4492         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
   4493                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
   4494       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
   4495                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
   4496     }
   4497   }
   4498 
   4499   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
   4500   //                               (and (srl x, (sub c1, c2), MASK)
   4501   // Only fold this if the inner shift has no other uses -- if it does, folding
   4502   // this will increase the total number of instructions.
   4503   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
   4504     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
   4505       uint64_t c1 = N0C1->getZExtValue();
   4506       if (c1 < OpSizeInBits) {
   4507         uint64_t c2 = N1C->getZExtValue();
   4508         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
   4509         SDValue Shift;
   4510         if (c2 > c1) {
   4511           Mask = Mask.shl(c2 - c1);
   4512           SDLoc DL(N);
   4513           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
   4514                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
   4515         } else {