Home | History | Annotate | Download | only in SelectionDAG
      1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
     11 // both before and after the DAG is legalized.
     12 //
     13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
     14 // primarily intended to handle simplification opportunities that are implicit
     15 // in the LLVM IR and exposed by the various codegen lowering phases.
     16 //
     17 //===----------------------------------------------------------------------===//
     18 
     19 #include "llvm/ADT/APFloat.h"
     20 #include "llvm/ADT/APInt.h"
     21 #include "llvm/ADT/ArrayRef.h"
     22 #include "llvm/ADT/DenseMap.h"
     23 #include "llvm/ADT/None.h"
     24 #include "llvm/ADT/Optional.h"
     25 #include "llvm/ADT/STLExtras.h"
     26 #include "llvm/ADT/SetVector.h"
     27 #include "llvm/ADT/SmallBitVector.h"
     28 #include "llvm/ADT/SmallPtrSet.h"
     29 #include "llvm/ADT/SmallSet.h"
     30 #include "llvm/ADT/SmallVector.h"
     31 #include "llvm/ADT/Statistic.h"
     32 #include "llvm/Analysis/AliasAnalysis.h"
     33 #include "llvm/Analysis/MemoryLocation.h"
     34 #include "llvm/CodeGen/DAGCombine.h"
     35 #include "llvm/CodeGen/ISDOpcodes.h"
     36 #include "llvm/CodeGen/MachineFrameInfo.h"
     37 #include "llvm/CodeGen/MachineFunction.h"
     38 #include "llvm/CodeGen/MachineMemOperand.h"
     39 #include "llvm/CodeGen/RuntimeLibcalls.h"
     40 #include "llvm/CodeGen/SelectionDAG.h"
     41 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
     42 #include "llvm/CodeGen/SelectionDAGNodes.h"
     43 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
     44 #include "llvm/CodeGen/TargetLowering.h"
     45 #include "llvm/CodeGen/TargetRegisterInfo.h"
     46 #include "llvm/CodeGen/TargetSubtargetInfo.h"
     47 #include "llvm/CodeGen/ValueTypes.h"
     48 #include "llvm/IR/Attributes.h"
     49 #include "llvm/IR/Constant.h"
     50 #include "llvm/IR/DataLayout.h"
     51 #include "llvm/IR/DerivedTypes.h"
     52 #include "llvm/IR/Function.h"
     53 #include "llvm/IR/LLVMContext.h"
     54 #include "llvm/IR/Metadata.h"
     55 #include "llvm/Support/Casting.h"
     56 #include "llvm/Support/CodeGen.h"
     57 #include "llvm/Support/CommandLine.h"
     58 #include "llvm/Support/Compiler.h"
     59 #include "llvm/Support/Debug.h"
     60 #include "llvm/Support/ErrorHandling.h"
     61 #include "llvm/Support/KnownBits.h"
     62 #include "llvm/Support/MachineValueType.h"
     63 #include "llvm/Support/MathExtras.h"
     64 #include "llvm/Support/raw_ostream.h"
     65 #include "llvm/Target/TargetMachine.h"
     66 #include "llvm/Target/TargetOptions.h"
     67 #include <algorithm>
     68 #include <cassert>
     69 #include <cstdint>
     70 #include <functional>
     71 #include <iterator>
     72 #include <string>
     73 #include <tuple>
     74 #include <utility>
     75 
     76 using namespace llvm;
     77 
     78 #define DEBUG_TYPE "dagcombine"
     79 
     80 STATISTIC(NodesCombined   , "Number of dag nodes combined");
     81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
     82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
     83 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
     84 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
     85 STATISTIC(SlicedLoads, "Number of load sliced");
     86 
     87 static cl::opt<bool>
     88 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
     89                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
     90 
     91 static cl::opt<bool>
     92 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
     93         cl::desc("Enable DAG combiner's use of TBAA"));
     94 
     95 #ifndef NDEBUG
     96 static cl::opt<std::string>
     97 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
     98                    cl::desc("Only use DAG-combiner alias analysis in this"
     99                             " function"));
    100 #endif
    101 
    102 /// Hidden option to stress test load slicing, i.e., when this option
    103 /// is enabled, load slicing bypasses most of its profitability guards.
    104 static cl::opt<bool>
    105 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
    106                   cl::desc("Bypass the profitability model of load slicing"),
    107                   cl::init(false));
    108 
    109 static cl::opt<bool>
    110   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
    111                     cl::desc("DAG combiner may split indexing from loads"));
    112 
    113 namespace {
    114 
    115   class DAGCombiner {
    116     SelectionDAG &DAG;
    117     const TargetLowering &TLI;
    118     CombineLevel Level;
    119     CodeGenOpt::Level OptLevel;
    120     bool LegalOperations = false;
    121     bool LegalTypes = false;
    122     bool ForCodeSize;
    123 
    124     /// Worklist of all of the nodes that need to be simplified.
    125     ///
    126     /// This must behave as a stack -- new nodes to process are pushed onto the
    127     /// back and when processing we pop off of the back.
    128     ///
    129     /// The worklist will not contain duplicates but may contain null entries
    130     /// due to nodes being deleted from the underlying DAG.
    131     SmallVector<SDNode *, 64> Worklist;
    132 
    133     /// Mapping from an SDNode to its position on the worklist.
    134     ///
    135     /// This is used to find and remove nodes from the worklist (by nulling
    136     /// them) when they are deleted from the underlying DAG. It relies on
    137     /// stable indices of nodes within the worklist.
    138     DenseMap<SDNode *, unsigned> WorklistMap;
    139 
    140     /// Set of nodes which have been combined (at least once).
    141     ///
    142     /// This is used to allow us to reliably add any operands of a DAG node
    143     /// which have not yet been combined to the worklist.
    144     SmallPtrSet<SDNode *, 32> CombinedNodes;
    145 
    146     // AA - Used for DAG load/store alias analysis.
    147     AliasAnalysis *AA;
    148 
    149     /// When an instruction is simplified, add all users of the instruction to
    150     /// the work lists because they might get more simplified now.
    151     void AddUsersToWorklist(SDNode *N) {
    152       for (SDNode *Node : N->uses())
    153         AddToWorklist(Node);
    154     }
    155 
    156     /// Call the node-specific routine that folds each particular type of node.
    157     SDValue visit(SDNode *N);
    158 
    159   public:
    160     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
    161         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
    162           OptLevel(OL), AA(AA) {
    163       ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
    164 
    165       MaximumLegalStoreInBits = 0;
    166       for (MVT VT : MVT::all_valuetypes())
    167         if (EVT(VT).isSimple() && VT != MVT::Other &&
    168             TLI.isTypeLegal(EVT(VT)) &&
    169             VT.getSizeInBits() >= MaximumLegalStoreInBits)
    170           MaximumLegalStoreInBits = VT.getSizeInBits();
    171     }
    172 
    173     /// Add to the worklist making sure its instance is at the back (next to be
    174     /// processed.)
    175     void AddToWorklist(SDNode *N) {
    176       assert(N->getOpcode() != ISD::DELETED_NODE &&
    177              "Deleted Node added to Worklist");
    178 
    179       // Skip handle nodes as they can't usefully be combined and confuse the
    180       // zero-use deletion strategy.
    181       if (N->getOpcode() == ISD::HANDLENODE)
    182         return;
    183 
    184       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
    185         Worklist.push_back(N);
    186     }
    187 
    188     /// Remove all instances of N from the worklist.
    189     void removeFromWorklist(SDNode *N) {
    190       CombinedNodes.erase(N);
    191 
    192       auto It = WorklistMap.find(N);
    193       if (It == WorklistMap.end())
    194         return; // Not in the worklist.
    195 
    196       // Null out the entry rather than erasing it to avoid a linear operation.
    197       Worklist[It->second] = nullptr;
    198       WorklistMap.erase(It);
    199     }
    200 
    201     void deleteAndRecombine(SDNode *N);
    202     bool recursivelyDeleteUnusedNodes(SDNode *N);
    203 
    204     /// Replaces all uses of the results of one DAG node with new values.
    205     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
    206                       bool AddTo = true);
    207 
    208     /// Replaces all uses of the results of one DAG node with new values.
    209     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
    210       return CombineTo(N, &Res, 1, AddTo);
    211     }
    212 
    213     /// Replaces all uses of the results of one DAG node with new values.
    214     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
    215                       bool AddTo = true) {
    216       SDValue To[] = { Res0, Res1 };
    217       return CombineTo(N, To, 2, AddTo);
    218     }
    219 
    220     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
    221 
    222   private:
    223     unsigned MaximumLegalStoreInBits;
    224 
    225     /// Check the specified integer node value to see if it can be simplified or
    226     /// if things it uses can be simplified by bit propagation.
    227     /// If so, return true.
    228     bool SimplifyDemandedBits(SDValue Op) {
    229       unsigned BitWidth = Op.getScalarValueSizeInBits();
    230       APInt Demanded = APInt::getAllOnesValue(BitWidth);
    231       return SimplifyDemandedBits(Op, Demanded);
    232     }
    233 
    234     /// Check the specified vector node value to see if it can be simplified or
    235     /// if things it uses can be simplified as it only uses some of the
    236     /// elements. If so, return true.
    237     bool SimplifyDemandedVectorElts(SDValue Op) {
    238       unsigned NumElts = Op.getValueType().getVectorNumElements();
    239       APInt Demanded = APInt::getAllOnesValue(NumElts);
    240       return SimplifyDemandedVectorElts(Op, Demanded);
    241     }
    242 
    243     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
    244     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
    245                                     bool AssumeSingleUse = false);
    246 
    247     bool CombineToPreIndexedLoadStore(SDNode *N);
    248     bool CombineToPostIndexedLoadStore(SDNode *N);
    249     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
    250     bool SliceUpLoad(SDNode *N);
    251 
    252     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
    253     ///   load.
    254     ///
    255     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
    256     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
    257     /// \param EltNo index of the vector element to load.
    258     /// \param OriginalLoad load that EVE came from to be replaced.
    259     /// \returns EVE on success SDValue() on failure.
    260     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
    261         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
    262     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
    263     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
    264     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
    265     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
    266     SDValue PromoteIntBinOp(SDValue Op);
    267     SDValue PromoteIntShiftOp(SDValue Op);
    268     SDValue PromoteExtend(SDValue Op);
    269     bool PromoteLoad(SDValue Op);
    270 
    271     /// Call the node-specific routine that knows how to fold each
    272     /// particular type of node. If that doesn't do anything, try the
    273     /// target-specific DAG combines.
    274     SDValue combine(SDNode *N);
    275 
    276     // Visitation implementation - Implement dag node combining for different
    277     // node types.  The semantics are as follows:
    278     // Return Value:
    279     //   SDValue.getNode() == 0 - No change was made
    280     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
    281     //   otherwise              - N should be replaced by the returned Operand.
    282     //
    283     SDValue visitTokenFactor(SDNode *N);
    284     SDValue visitMERGE_VALUES(SDNode *N);
    285     SDValue visitADD(SDNode *N);
    286     SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
    287     SDValue visitSUB(SDNode *N);
    288     SDValue visitADDC(SDNode *N);
    289     SDValue visitUADDO(SDNode *N);
    290     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
    291     SDValue visitSUBC(SDNode *N);
    292     SDValue visitUSUBO(SDNode *N);
    293     SDValue visitADDE(SDNode *N);
    294     SDValue visitADDCARRY(SDNode *N);
    295     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
    296     SDValue visitSUBE(SDNode *N);
    297     SDValue visitSUBCARRY(SDNode *N);
    298     SDValue visitMUL(SDNode *N);
    299     SDValue useDivRem(SDNode *N);
    300     SDValue visitSDIV(SDNode *N);
    301     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
    302     SDValue visitUDIV(SDNode *N);
    303     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
    304     SDValue visitREM(SDNode *N);
    305     SDValue visitMULHU(SDNode *N);
    306     SDValue visitMULHS(SDNode *N);
    307     SDValue visitSMUL_LOHI(SDNode *N);
    308     SDValue visitUMUL_LOHI(SDNode *N);
    309     SDValue visitSMULO(SDNode *N);
    310     SDValue visitUMULO(SDNode *N);
    311     SDValue visitIMINMAX(SDNode *N);
    312     SDValue visitAND(SDNode *N);
    313     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
    314     SDValue visitOR(SDNode *N);
    315     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
    316     SDValue visitXOR(SDNode *N);
    317     SDValue SimplifyVBinOp(SDNode *N);
    318     SDValue visitSHL(SDNode *N);
    319     SDValue visitSRA(SDNode *N);
    320     SDValue visitSRL(SDNode *N);
    321     SDValue visitRotate(SDNode *N);
    322     SDValue visitABS(SDNode *N);
    323     SDValue visitBSWAP(SDNode *N);
    324     SDValue visitBITREVERSE(SDNode *N);
    325     SDValue visitCTLZ(SDNode *N);
    326     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
    327     SDValue visitCTTZ(SDNode *N);
    328     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
    329     SDValue visitCTPOP(SDNode *N);
    330     SDValue visitSELECT(SDNode *N);
    331     SDValue visitVSELECT(SDNode *N);
    332     SDValue visitSELECT_CC(SDNode *N);
    333     SDValue visitSETCC(SDNode *N);
    334     SDValue visitSETCCCARRY(SDNode *N);
    335     SDValue visitSIGN_EXTEND(SDNode *N);
    336     SDValue visitZERO_EXTEND(SDNode *N);
    337     SDValue visitANY_EXTEND(SDNode *N);
    338     SDValue visitAssertExt(SDNode *N);
    339     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
    340     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
    341     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
    342     SDValue visitTRUNCATE(SDNode *N);
    343     SDValue visitBITCAST(SDNode *N);
    344     SDValue visitBUILD_PAIR(SDNode *N);
    345     SDValue visitFADD(SDNode *N);
    346     SDValue visitFSUB(SDNode *N);
    347     SDValue visitFMUL(SDNode *N);
    348     SDValue visitFMA(SDNode *N);
    349     SDValue visitFDIV(SDNode *N);
    350     SDValue visitFREM(SDNode *N);
    351     SDValue visitFSQRT(SDNode *N);
    352     SDValue visitFCOPYSIGN(SDNode *N);
    353     SDValue visitSINT_TO_FP(SDNode *N);
    354     SDValue visitUINT_TO_FP(SDNode *N);
    355     SDValue visitFP_TO_SINT(SDNode *N);
    356     SDValue visitFP_TO_UINT(SDNode *N);
    357     SDValue visitFP_ROUND(SDNode *N);
    358     SDValue visitFP_ROUND_INREG(SDNode *N);
    359     SDValue visitFP_EXTEND(SDNode *N);
    360     SDValue visitFNEG(SDNode *N);
    361     SDValue visitFABS(SDNode *N);
    362     SDValue visitFCEIL(SDNode *N);
    363     SDValue visitFTRUNC(SDNode *N);
    364     SDValue visitFFLOOR(SDNode *N);
    365     SDValue visitFMINNUM(SDNode *N);
    366     SDValue visitFMAXNUM(SDNode *N);
    367     SDValue visitBRCOND(SDNode *N);
    368     SDValue visitBR_CC(SDNode *N);
    369     SDValue visitLOAD(SDNode *N);
    370 
    371     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
    372     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
    373 
    374     SDValue visitSTORE(SDNode *N);
    375     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
    376     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
    377     SDValue visitBUILD_VECTOR(SDNode *N);
    378     SDValue visitCONCAT_VECTORS(SDNode *N);
    379     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
    380     SDValue visitVECTOR_SHUFFLE(SDNode *N);
    381     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
    382     SDValue visitINSERT_SUBVECTOR(SDNode *N);
    383     SDValue visitMLOAD(SDNode *N);
    384     SDValue visitMSTORE(SDNode *N);
    385     SDValue visitMGATHER(SDNode *N);
    386     SDValue visitMSCATTER(SDNode *N);
    387     SDValue visitFP_TO_FP16(SDNode *N);
    388     SDValue visitFP16_TO_FP(SDNode *N);
    389 
    390     SDValue visitFADDForFMACombine(SDNode *N);
    391     SDValue visitFSUBForFMACombine(SDNode *N);
    392     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
    393 
    394     SDValue XformToShuffleWithZero(SDNode *N);
    395     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
    396                            SDValue N1);
    397 
    398     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
    399 
    400     SDValue foldSelectOfConstants(SDNode *N);
    401     SDValue foldVSelectOfConstants(SDNode *N);
    402     SDValue foldBinOpIntoSelect(SDNode *BO);
    403     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
    404     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
    405     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
    406     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
    407                              SDValue N2, SDValue N3, ISD::CondCode CC,
    408                              bool NotExtCompare = false);
    409     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
    410                                    SDValue N2, SDValue N3, ISD::CondCode CC);
    411     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
    412                               const SDLoc &DL);
    413     SDValue unfoldMaskedMerge(SDNode *N);
    414     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
    415     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
    416                           const SDLoc &DL, bool foldBooleans);
    417     SDValue rebuildSetCC(SDValue N);
    418 
    419     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
    420                            SDValue &CC) const;
    421     bool isOneUseSetCC(SDValue N) const;
    422 
    423     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
    424                                          unsigned HiOp);
    425     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
    426     SDValue CombineExtLoad(SDNode *N);
    427     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
    428     SDValue combineRepeatedFPDivisors(SDNode *N);
    429     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
    430     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
    431     SDValue BuildSDIV(SDNode *N);
    432     SDValue BuildSDIVPow2(SDNode *N);
    433     SDValue BuildUDIV(SDNode *N);
    434     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
    435     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
    436     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
    437     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
    438     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
    439     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
    440                                 SDNodeFlags Flags, bool Reciprocal);
    441     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
    442                                 SDNodeFlags Flags, bool Reciprocal);
    443     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
    444                                bool DemandHighBits = true);
    445     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
    446     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
    447                               SDValue InnerPos, SDValue InnerNeg,
    448                               unsigned PosOpcode, unsigned NegOpcode,
    449                               const SDLoc &DL);
    450     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
    451     SDValue MatchLoadCombine(SDNode *N);
    452     SDValue ReduceLoadWidth(SDNode *N);
    453     SDValue ReduceLoadOpStoreWidth(SDNode *N);
    454     SDValue splitMergedValStore(StoreSDNode *ST);
    455     SDValue TransformFPLoadStorePair(SDNode *N);
    456     SDValue convertBuildVecZextToZext(SDNode *N);
    457     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
    458     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
    459     SDValue reduceBuildVecToShuffle(SDNode *N);
    460     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
    461                                   ArrayRef<int> VectorMask, SDValue VecIn1,
    462                                   SDValue VecIn2, unsigned LeftIdx);
    463     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
    464 
    465     /// Walk up chain skipping non-aliasing memory nodes,
    466     /// looking for aliasing nodes and adding them to the Aliases vector.
    467     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
    468                           SmallVectorImpl<SDValue> &Aliases);
    469 
    470     /// Return true if there is any possibility that the two addresses overlap.
    471     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
    472 
    473     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
    474     /// chain (aliasing node.)
    475     SDValue FindBetterChain(SDNode *N, SDValue Chain);
    476 
    477     /// Try to replace a store and any possibly adjacent stores on
    478     /// consecutive chains with better chains. Return true only if St is
    479     /// replaced.
    480     ///
    481     /// Notice that other chains may still be replaced even if the function
    482     /// returns false.
    483     bool findBetterNeighborChains(StoreSDNode *St);
    484 
    485     /// Holds a pointer to an LSBaseSDNode as well as information on where it
    486     /// is located in a sequence of memory operations connected by a chain.
    487     struct MemOpLink {
    488       // Ptr to the mem node.
    489       LSBaseSDNode *MemNode;
    490 
    491       // Offset from the base ptr.
    492       int64_t OffsetFromBase;
    493 
    494       MemOpLink(LSBaseSDNode *N, int64_t Offset)
    495           : MemNode(N), OffsetFromBase(Offset) {}
    496     };
    497 
    498     /// This is a helper function for visitMUL to check the profitability
    499     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
    500     /// MulNode is the original multiply, AddNode is (add x, c1),
    501     /// and ConstNode is c2.
    502     bool isMulAddWithConstProfitable(SDNode *MulNode,
    503                                      SDValue &AddNode,
    504                                      SDValue &ConstNode);
    505 
    506     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
    507     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
    508     /// the type of the loaded value to be extended.
    509     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
    510                           EVT LoadResultTy, EVT &ExtVT);
    511 
    512     /// Helper function to calculate whether the given Load/Store can have its
    513     /// width reduced to ExtVT.
    514     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
    515                            EVT &MemVT, unsigned ShAmt = 0);
    516 
    517     /// Used by BackwardsPropagateMask to find suitable loads.
    518     bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
    519                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
    520                            ConstantSDNode *Mask, SDNode *&NodeToMask);
    521     /// Attempt to propagate a given AND node back to load leaves so that they
    522     /// can be combined into narrow loads.
    523     bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
    524 
    525     /// Helper function for MergeConsecutiveStores which merges the
    526     /// component store chains.
    527     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
    528                                 unsigned NumStores);
    529 
    530     /// This is a helper function for MergeConsecutiveStores. When the
    531     /// source elements of the consecutive stores are all constants or
    532     /// all extracted vector elements, try to merge them into one
    533     /// larger store introducing bitcasts if necessary.  \return True
    534     /// if a merged store was created.
    535     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
    536                                          EVT MemVT, unsigned NumStores,
    537                                          bool IsConstantSrc, bool UseVector,
    538                                          bool UseTrunc);
    539 
    540     /// This is a helper function for MergeConsecutiveStores. Stores
    541     /// that potentially may be merged with St are placed in
    542     /// StoreNodes. RootNode is a chain predecessor to all store
    543     /// candidates.
    544     void getStoreMergeCandidates(StoreSDNode *St,
    545                                  SmallVectorImpl<MemOpLink> &StoreNodes,
    546                                  SDNode *&Root);
    547 
    548     /// Helper function for MergeConsecutiveStores. Checks if
    549     /// candidate stores have indirect dependency through their
    550     /// operands. RootNode is the predecessor to all stores calculated
    551     /// by getStoreMergeCandidates and is used to prune the dependency check.
    552     /// \return True if safe to merge.
    553     bool checkMergeStoreCandidatesForDependencies(
    554         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
    555         SDNode *RootNode);
    556 
    557     /// Merge consecutive store operations into a wide store.
    558     /// This optimization uses wide integers or vectors when possible.
    559     /// \return number of stores that were merged into a merged store (the
    560     /// affected nodes are stored as a prefix in \p StoreNodes).
    561     bool MergeConsecutiveStores(StoreSDNode *St);
    562 
    563     /// Try to transform a truncation where C is a constant:
    564     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
    565     ///
    566     /// \p N needs to be a truncation and its first operand an AND. Other
    567     /// requirements are checked by the function (e.g. that trunc is
    568     /// single-use) and if missed an empty SDValue is returned.
    569     SDValue distributeTruncateThroughAnd(SDNode *N);
    570 
    571     /// Helper function to determine whether the target supports operation
    572     /// given by \p Opcode for type \p VT, that is, whether the operation
    573     /// is legal or custom before legalizing operations, and whether is
    574     /// legal (but not custom) after legalization.
    575     bool hasOperation(unsigned Opcode, EVT VT) {
    576       if (LegalOperations)
    577         return TLI.isOperationLegal(Opcode, VT);
    578       return TLI.isOperationLegalOrCustom(Opcode, VT);
    579     }
    580 
    581   public:
    582     /// Runs the dag combiner on all nodes in the work list
    583     void Run(CombineLevel AtLevel);
    584 
    585     SelectionDAG &getDAG() const { return DAG; }
    586 
    587     /// Returns a type large enough to hold any valid shift amount - before type
    588     /// legalization these can be huge.
    589     EVT getShiftAmountTy(EVT LHSTy) {
    590       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
    591       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
    592     }
    593 
    594     /// This method returns true if we are running before type legalization or
    595     /// if the specified VT is legal.
    596     bool isTypeLegal(const EVT &VT) {
    597       if (!LegalTypes) return true;
    598       return TLI.isTypeLegal(VT);
    599     }
    600 
    601     /// Convenience wrapper around TargetLowering::getSetCCResultType
    602     EVT getSetCCResultType(EVT VT) const {
    603       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
    604     }
    605 
    606     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
    607                          SDValue OrigLoad, SDValue ExtLoad,
    608                          ISD::NodeType ExtType);
    609   };
    610 
    611 /// This class is a DAGUpdateListener that removes any deleted
    612 /// nodes from the worklist.
    613 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
    614   DAGCombiner &DC;
    615 
    616 public:
    617   explicit WorklistRemover(DAGCombiner &dc)
    618     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
    619 
    620   void NodeDeleted(SDNode *N, SDNode *E) override {
    621     DC.removeFromWorklist(N);
    622   }
    623 };
    624 
    625 } // end anonymous namespace
    626 
    627 //===----------------------------------------------------------------------===//
    628 //  TargetLowering::DAGCombinerInfo implementation
    629 //===----------------------------------------------------------------------===//
    630 
    631 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
    632   ((DAGCombiner*)DC)->AddToWorklist(N);
    633 }
    634 
    635 SDValue TargetLowering::DAGCombinerInfo::
    636 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
    637   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
    638 }
    639 
    640 SDValue TargetLowering::DAGCombinerInfo::
    641 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
    642   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
    643 }
    644 
    645 SDValue TargetLowering::DAGCombinerInfo::
    646 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
    647   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
    648 }
    649 
    650 void TargetLowering::DAGCombinerInfo::
    651 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
    652   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
    653 }
    654 
    655 //===----------------------------------------------------------------------===//
    656 // Helper Functions
    657 //===----------------------------------------------------------------------===//
    658 
    659 void DAGCombiner::deleteAndRecombine(SDNode *N) {
    660   removeFromWorklist(N);
    661 
    662   // If the operands of this node are only used by the node, they will now be
    663   // dead. Make sure to re-visit them and recursively delete dead nodes.
    664   for (const SDValue &Op : N->ops())
    665     // For an operand generating multiple values, one of the values may
    666     // become dead allowing further simplification (e.g. split index
    667     // arithmetic from an indexed load).
    668     if (Op->hasOneUse() || Op->getNumValues() > 1)
    669       AddToWorklist(Op.getNode());
    670 
    671   DAG.DeleteNode(N);
    672 }
    673 
    674 /// Return 1 if we can compute the negated form of the specified expression for
    675 /// the same cost as the expression itself, or 2 if we can compute the negated
    676 /// form more cheaply than the expression itself.
    677 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
    678                                const TargetLowering &TLI,
    679                                const TargetOptions *Options,
    680                                unsigned Depth = 0) {
    681   // fneg is removable even if it has multiple uses.
    682   if (Op.getOpcode() == ISD::FNEG) return 2;
    683 
    684   // Don't allow anything with multiple uses unless we know it is free.
    685   EVT VT = Op.getValueType();
    686   const SDNodeFlags Flags = Op->getFlags();
    687   if (!Op.hasOneUse())
    688     if (!(Op.getOpcode() == ISD::FP_EXTEND &&
    689           TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
    690       return 0;
    691 
    692   // Don't recurse exponentially.
    693   if (Depth > 6) return 0;
    694 
    695   switch (Op.getOpcode()) {
    696   default: return false;
    697   case ISD::ConstantFP: {
    698     if (!LegalOperations)
    699       return 1;
    700 
    701     // Don't invert constant FP values after legalization unless the target says
    702     // the negated constant is legal.
    703     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
    704       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
    705   }
    706   case ISD::FADD:
    707     if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
    708       return 0;
    709 
    710     // After operation legalization, it might not be legal to create new FSUBs.
    711     if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
    712       return 0;
    713 
    714     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
    715     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
    716                                     Options, Depth + 1))
    717       return V;
    718     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
    719     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
    720                               Depth + 1);
    721   case ISD::FSUB:
    722     // We can't turn -(A-B) into B-A when we honor signed zeros.
    723     if (!Options->NoSignedZerosFPMath &&
    724         !Flags.hasNoSignedZeros())
    725       return 0;
    726 
    727     // fold (fneg (fsub A, B)) -> (fsub B, A)
    728     return 1;
    729 
    730   case ISD::FMUL:
    731   case ISD::FDIV:
    732     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
    733     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
    734                                     Options, Depth + 1))
    735       return V;
    736 
    737     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
    738                               Depth + 1);
    739 
    740   case ISD::FP_EXTEND:
    741   case ISD::FP_ROUND:
    742   case ISD::FSIN:
    743     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
    744                               Depth + 1);
    745   }
    746 }
    747 
    748 /// If isNegatibleForFree returns true, return the newly negated expression.
    749 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
    750                                     bool LegalOperations, unsigned Depth = 0) {
    751   const TargetOptions &Options = DAG.getTarget().Options;
    752   // fneg is removable even if it has multiple uses.
    753   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
    754 
    755   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
    756 
    757   const SDNodeFlags Flags = Op.getNode()->getFlags();
    758 
    759   switch (Op.getOpcode()) {
    760   default: llvm_unreachable("Unknown code");
    761   case ISD::ConstantFP: {
    762     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
    763     V.changeSign();
    764     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
    765   }
    766   case ISD::FADD:
    767     assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
    768 
    769     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
    770     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
    771                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
    772       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
    773                          GetNegatedExpression(Op.getOperand(0), DAG,
    774                                               LegalOperations, Depth+1),
    775                          Op.getOperand(1), Flags);
    776     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
    777     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
    778                        GetNegatedExpression(Op.getOperand(1), DAG,
    779                                             LegalOperations, Depth+1),
    780                        Op.getOperand(0), Flags);
    781   case ISD::FSUB:
    782     // fold (fneg (fsub 0, B)) -> B
    783     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
    784       if (N0CFP->isZero())
    785         return Op.getOperand(1);
    786 
    787     // fold (fneg (fsub A, B)) -> (fsub B, A)
    788     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
    789                        Op.getOperand(1), Op.getOperand(0), Flags);
    790 
    791   case ISD::FMUL:
    792   case ISD::FDIV:
    793     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
    794     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
    795                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
    796       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
    797                          GetNegatedExpression(Op.getOperand(0), DAG,
    798                                               LegalOperations, Depth+1),
    799                          Op.getOperand(1), Flags);
    800 
    801     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
    802     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
    803                        Op.getOperand(0),
    804                        GetNegatedExpression(Op.getOperand(1), DAG,
    805                                             LegalOperations, Depth+1), Flags);
    806 
    807   case ISD::FP_EXTEND:
    808   case ISD::FSIN:
    809     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
    810                        GetNegatedExpression(Op.getOperand(0), DAG,
    811                                             LegalOperations, Depth+1));
    812   case ISD::FP_ROUND:
    813       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
    814                          GetNegatedExpression(Op.getOperand(0), DAG,
    815                                               LegalOperations, Depth+1),
    816                          Op.getOperand(1));
    817   }
    818 }
    819 
    820 // APInts must be the same size for most operations, this helper
    821 // function zero extends the shorter of the pair so that they match.
    822 // We provide an Offset so that we can create bitwidths that won't overflow.
    823 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
    824   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
    825   LHS = LHS.zextOrSelf(Bits);
    826   RHS = RHS.zextOrSelf(Bits);
    827 }
    828 
    829 // Return true if this node is a setcc, or is a select_cc
    830 // that selects between the target values used for true and false, making it
    831 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
    832 // the appropriate nodes based on the type of node we are checking. This
    833 // simplifies life a bit for the callers.
    834 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
    835                                     SDValue &CC) const {
    836   if (N.getOpcode() == ISD::SETCC) {
    837     LHS = N.getOperand(0);
    838     RHS = N.getOperand(1);
    839     CC  = N.getOperand(2);
    840     return true;
    841   }
    842 
    843   if (N.getOpcode() != ISD::SELECT_CC ||
    844       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
    845       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
    846     return false;
    847 
    848   if (TLI.getBooleanContents(N.getValueType()) ==
    849       TargetLowering::UndefinedBooleanContent)
    850     return false;
    851 
    852   LHS = N.getOperand(0);
    853   RHS = N.getOperand(1);
    854   CC  = N.getOperand(4);
    855   return true;
    856 }
    857 
    858 /// Return true if this is a SetCC-equivalent operation with only one use.
    859 /// If this is true, it allows the users to invert the operation for free when
    860 /// it is profitable to do so.
    861 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
    862   SDValue N0, N1, N2;
    863   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
    864     return true;
    865   return false;
    866 }
    867 
    868 static SDValue peekThroughBitcast(SDValue V) {
    869   while (V.getOpcode() == ISD::BITCAST)
    870     V = V.getOperand(0);
    871   return V;
    872 }
    873 
    874 // Returns the SDNode if it is a constant float BuildVector
    875 // or constant float.
    876 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
    877   if (isa<ConstantFPSDNode>(N))
    878     return N.getNode();
    879   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
    880     return N.getNode();
    881   return nullptr;
    882 }
    883 
    884 // Determines if it is a constant integer or a build vector of constant
    885 // integers (and undefs).
    886 // Do not permit build vector implicit truncation.
    887 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
    888   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
    889     return !(Const->isOpaque() && NoOpaques);
    890   if (N.getOpcode() != ISD::BUILD_VECTOR)
    891     return false;
    892   unsigned BitWidth = N.getScalarValueSizeInBits();
    893   for (const SDValue &Op : N->op_values()) {
    894     if (Op.isUndef())
    895       continue;
    896     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
    897     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
    898         (Const->isOpaque() && NoOpaques))
    899       return false;
    900   }
    901   return true;
    902 }
    903 
    904 // Determines if it is a constant null integer or a splatted vector of a
    905 // constant null integer (with no undefs).
    906 // Build vector implicit truncation is not an issue for null values.
    907 static bool isNullConstantOrNullSplatConstant(SDValue N) {
    908   // TODO: may want to use peekThroughBitcast() here.
    909   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
    910     return Splat->isNullValue();
    911   return false;
    912 }
    913 
    914 // Determines if it is a constant integer of one or a splatted vector of a
    915 // constant integer of one (with no undefs).
    916 // Do not permit build vector implicit truncation.
    917 static bool isOneConstantOrOneSplatConstant(SDValue N) {
    918   // TODO: may want to use peekThroughBitcast() here.
    919   unsigned BitWidth = N.getScalarValueSizeInBits();
    920   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
    921     return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
    922   return false;
    923 }
    924 
    925 // Determines if it is a constant integer of all ones or a splatted vector of a
    926 // constant integer of all ones (with no undefs).
    927 // Do not permit build vector implicit truncation.
    928 static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
    929   N = peekThroughBitcast(N);
    930   unsigned BitWidth = N.getScalarValueSizeInBits();
    931   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
    932     return Splat->isAllOnesValue() &&
    933            Splat->getAPIntValue().getBitWidth() == BitWidth;
    934   return false;
    935 }
    936 
    937 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
    938 // undef's.
    939 static bool isAnyConstantBuildVector(const SDNode *N) {
    940   return ISD::isBuildVectorOfConstantSDNodes(N) ||
    941          ISD::isBuildVectorOfConstantFPSDNodes(N);
    942 }
    943 
    944 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
    945                                     SDValue N1) {
    946   EVT VT = N0.getValueType();
    947   if (N0.getOpcode() == Opc) {
    948     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
    949       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
    950         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
    951         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
    952           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
    953         return SDValue();
    954       }
    955       if (N0.hasOneUse()) {
    956         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
    957         // use
    958         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
    959         if (!OpNode.getNode())
    960           return SDValue();
    961         AddToWorklist(OpNode.getNode());
    962         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
    963       }
    964     }
    965   }
    966 
    967   if (N1.getOpcode() == Opc) {
    968     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
    969       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
    970         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
    971         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
    972           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
    973         return SDValue();
    974       }
    975       if (N1.hasOneUse()) {
    976         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
    977         // use
    978         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
    979         if (!OpNode.getNode())
    980           return SDValue();
    981         AddToWorklist(OpNode.getNode());
    982         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
    983       }
    984     }
    985   }
    986 
    987   return SDValue();
    988 }
    989 
    990 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
    991                                bool AddTo) {
    992   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
    993   ++NodesCombined;
    994   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
    995              To[0].getNode()->dump(&DAG);
    996              dbgs() << " and " << NumTo - 1 << " other values\n");
    997   for (unsigned i = 0, e = NumTo; i != e; ++i)
    998     assert((!To[i].getNode() ||
    999             N->getValueType(i) == To[i].getValueType()) &&
   1000            "Cannot combine value to value of different type!");
   1001 
   1002   WorklistRemover DeadNodes(*this);
   1003   DAG.ReplaceAllUsesWith(N, To);
   1004   if (AddTo) {
   1005     // Push the new nodes and any users onto the worklist
   1006     for (unsigned i = 0, e = NumTo; i != e; ++i) {
   1007       if (To[i].getNode()) {
   1008         AddToWorklist(To[i].getNode());
   1009         AddUsersToWorklist(To[i].getNode());
   1010       }
   1011     }
   1012   }
   1013 
   1014   // Finally, if the node is now dead, remove it from the graph.  The node
   1015   // may not be dead if the replacement process recursively simplified to
   1016   // something else needing this node.
   1017   if (N->use_empty())
   1018     deleteAndRecombine(N);
   1019   return SDValue(N, 0);
   1020 }
   1021 
   1022 void DAGCombiner::
   1023 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
   1024   // Replace all uses.  If any nodes become isomorphic to other nodes and
   1025   // are deleted, make sure to remove them from our worklist.
   1026   WorklistRemover DeadNodes(*this);
   1027   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
   1028 
   1029   // Push the new node and any (possibly new) users onto the worklist.
   1030   AddToWorklist(TLO.New.getNode());
   1031   AddUsersToWorklist(TLO.New.getNode());
   1032 
   1033   // Finally, if the node is now dead, remove it from the graph.  The node
   1034   // may not be dead if the replacement process recursively simplified to
   1035   // something else needing this node.
   1036   if (TLO.Old.getNode()->use_empty())
   1037     deleteAndRecombine(TLO.Old.getNode());
   1038 }
   1039 
   1040 /// Check the specified integer node value to see if it can be simplified or if
   1041 /// things it uses can be simplified by bit propagation. If so, return true.
   1042 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
   1043   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
   1044   KnownBits Known;
   1045   if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
   1046     return false;
   1047 
   1048   // Revisit the node.
   1049   AddToWorklist(Op.getNode());
   1050 
   1051   // Replace the old value with the new one.
   1052   ++NodesCombined;
   1053   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
   1054              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
   1055              dbgs() << '\n');
   1056 
   1057   CommitTargetLoweringOpt(TLO);
   1058   return true;
   1059 }
   1060 
   1061 /// Check the specified vector node value to see if it can be simplified or
   1062 /// if things it uses can be simplified as it only uses some of the elements.
   1063 /// If so, return true.
   1064 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
   1065                                              bool AssumeSingleUse) {
   1066   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
   1067   APInt KnownUndef, KnownZero;
   1068   if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
   1069                                       0, AssumeSingleUse))
   1070     return false;
   1071 
   1072   // Revisit the node.
   1073   AddToWorklist(Op.getNode());
   1074 
   1075   // Replace the old value with the new one.
   1076   ++NodesCombined;
   1077   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
   1078              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
   1079              dbgs() << '\n');
   1080 
   1081   CommitTargetLoweringOpt(TLO);
   1082   return true;
   1083 }
   1084 
   1085 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
   1086   SDLoc DL(Load);
   1087   EVT VT = Load->getValueType(0);
   1088   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
   1089 
   1090   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
   1091              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
   1092   WorklistRemover DeadNodes(*this);
   1093   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
   1094   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
   1095   deleteAndRecombine(Load);
   1096   AddToWorklist(Trunc.getNode());
   1097 }
   1098 
   1099 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
   1100   Replace = false;
   1101   SDLoc DL(Op);
   1102   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
   1103     LoadSDNode *LD = cast<LoadSDNode>(Op);
   1104     EVT MemVT = LD->getMemoryVT();
   1105     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
   1106                                                       : LD->getExtensionType();
   1107     Replace = true;
   1108     return DAG.getExtLoad(ExtType, DL, PVT,
   1109                           LD->getChain(), LD->getBasePtr(),
   1110                           MemVT, LD->getMemOperand());
   1111   }
   1112 
   1113   unsigned Opc = Op.getOpcode();
   1114   switch (Opc) {
   1115   default: break;
   1116   case ISD::AssertSext:
   1117     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
   1118       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
   1119     break;
   1120   case ISD::AssertZext:
   1121     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
   1122       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
   1123     break;
   1124   case ISD::Constant: {
   1125     unsigned ExtOpc =
   1126       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
   1127     return DAG.getNode(ExtOpc, DL, PVT, Op);
   1128   }
   1129   }
   1130 
   1131   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
   1132     return SDValue();
   1133   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
   1134 }
   1135 
   1136 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
   1137   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
   1138     return SDValue();
   1139   EVT OldVT = Op.getValueType();
   1140   SDLoc DL(Op);
   1141   bool Replace = false;
   1142   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
   1143   if (!NewOp.getNode())
   1144     return SDValue();
   1145   AddToWorklist(NewOp.getNode());
   1146 
   1147   if (Replace)
   1148     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
   1149   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
   1150                      DAG.getValueType(OldVT));
   1151 }
   1152 
   1153 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
   1154   EVT OldVT = Op.getValueType();
   1155   SDLoc DL(Op);
   1156   bool Replace = false;
   1157   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
   1158   if (!NewOp.getNode())
   1159     return SDValue();
   1160   AddToWorklist(NewOp.getNode());
   1161 
   1162   if (Replace)
   1163     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
   1164   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
   1165 }
   1166 
   1167 /// Promote the specified integer binary operation if the target indicates it is
   1168 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
   1169 /// i32 since i16 instructions are longer.
   1170 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
   1171   if (!LegalOperations)
   1172     return SDValue();
   1173 
   1174   EVT VT = Op.getValueType();
   1175   if (VT.isVector() || !VT.isInteger())
   1176     return SDValue();
   1177 
   1178   // If operation type is 'undesirable', e.g. i16 on x86, consider
   1179   // promoting it.
   1180   unsigned Opc = Op.getOpcode();
   1181   if (TLI.isTypeDesirableForOp(Opc, VT))
   1182     return SDValue();
   1183 
   1184   EVT PVT = VT;
   1185   // Consult target whether it is a good idea to promote this operation and
   1186   // what's the right type to promote it to.
   1187   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
   1188     assert(PVT != VT && "Don't know what type to promote to!");
   1189 
   1190     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
   1191 
   1192     bool Replace0 = false;
   1193     SDValue N0 = Op.getOperand(0);
   1194     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
   1195 
   1196     bool Replace1 = false;
   1197     SDValue N1 = Op.getOperand(1);
   1198     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
   1199     SDLoc DL(Op);
   1200 
   1201     SDValue RV =
   1202         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
   1203 
   1204     // We are always replacing N0/N1's use in N and only need
   1205     // additional replacements if there are additional uses.
   1206     Replace0 &= !N0->hasOneUse();
   1207     Replace1 &= (N0 != N1) && !N1->hasOneUse();
   1208 
   1209     // Combine Op here so it is preserved past replacements.
   1210     CombineTo(Op.getNode(), RV);
   1211 
   1212     // If operands have a use ordering, make sure we deal with
   1213     // predecessor first.
   1214     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
   1215       std::swap(N0, N1);
   1216       std::swap(NN0, NN1);
   1217     }
   1218 
   1219     if (Replace0) {
   1220       AddToWorklist(NN0.getNode());
   1221       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
   1222     }
   1223     if (Replace1) {
   1224       AddToWorklist(NN1.getNode());
   1225       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
   1226     }
   1227     return Op;
   1228   }
   1229   return SDValue();
   1230 }
   1231 
   1232 /// Promote the specified integer shift operation if the target indicates it is
   1233 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
   1234 /// i32 since i16 instructions are longer.
   1235 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
   1236   if (!LegalOperations)
   1237     return SDValue();
   1238 
   1239   EVT VT = Op.getValueType();
   1240   if (VT.isVector() || !VT.isInteger())
   1241     return SDValue();
   1242 
   1243   // If operation type is 'undesirable', e.g. i16 on x86, consider
   1244   // promoting it.
   1245   unsigned Opc = Op.getOpcode();
   1246   if (TLI.isTypeDesirableForOp(Opc, VT))
   1247     return SDValue();
   1248 
   1249   EVT PVT = VT;
   1250   // Consult target whether it is a good idea to promote this operation and
   1251   // what's the right type to promote it to.
   1252   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
   1253     assert(PVT != VT && "Don't know what type to promote to!");
   1254 
   1255     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
   1256 
   1257     bool Replace = false;
   1258     SDValue N0 = Op.getOperand(0);
   1259     SDValue N1 = Op.getOperand(1);
   1260     if (Opc == ISD::SRA)
   1261       N0 = SExtPromoteOperand(N0, PVT);
   1262     else if (Opc == ISD::SRL)
   1263       N0 = ZExtPromoteOperand(N0, PVT);
   1264     else
   1265       N0 = PromoteOperand(N0, PVT, Replace);
   1266 
   1267     if (!N0.getNode())
   1268       return SDValue();
   1269 
   1270     SDLoc DL(Op);
   1271     SDValue RV =
   1272         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
   1273 
   1274     AddToWorklist(N0.getNode());
   1275     if (Replace)
   1276       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
   1277 
   1278     // Deal with Op being deleted.
   1279     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
   1280       return RV;
   1281   }
   1282   return SDValue();
   1283 }
   1284 
   1285 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
   1286   if (!LegalOperations)
   1287     return SDValue();
   1288 
   1289   EVT VT = Op.getValueType();
   1290   if (VT.isVector() || !VT.isInteger())
   1291     return SDValue();
   1292 
   1293   // If operation type is 'undesirable', e.g. i16 on x86, consider
   1294   // promoting it.
   1295   unsigned Opc = Op.getOpcode();
   1296   if (TLI.isTypeDesirableForOp(Opc, VT))
   1297     return SDValue();
   1298 
   1299   EVT PVT = VT;
   1300   // Consult target whether it is a good idea to promote this operation and
   1301   // what's the right type to promote it to.
   1302   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
   1303     assert(PVT != VT && "Don't know what type to promote to!");
   1304     // fold (aext (aext x)) -> (aext x)
   1305     // fold (aext (zext x)) -> (zext x)
   1306     // fold (aext (sext x)) -> (sext x)
   1307     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
   1308     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
   1309   }
   1310   return SDValue();
   1311 }
   1312 
   1313 bool DAGCombiner::PromoteLoad(SDValue Op) {
   1314   if (!LegalOperations)
   1315     return false;
   1316 
   1317   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
   1318     return false;
   1319 
   1320   EVT VT = Op.getValueType();
   1321   if (VT.isVector() || !VT.isInteger())
   1322     return false;
   1323 
   1324   // If operation type is 'undesirable', e.g. i16 on x86, consider
   1325   // promoting it.
   1326   unsigned Opc = Op.getOpcode();
   1327   if (TLI.isTypeDesirableForOp(Opc, VT))
   1328     return false;
   1329 
   1330   EVT PVT = VT;
   1331   // Consult target whether it is a good idea to promote this operation and
   1332   // what's the right type to promote it to.
   1333   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
   1334     assert(PVT != VT && "Don't know what type to promote to!");
   1335 
   1336     SDLoc DL(Op);
   1337     SDNode *N = Op.getNode();
   1338     LoadSDNode *LD = cast<LoadSDNode>(N);
   1339     EVT MemVT = LD->getMemoryVT();
   1340     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
   1341                                                       : LD->getExtensionType();
   1342     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
   1343                                    LD->getChain(), LD->getBasePtr(),
   1344                                    MemVT, LD->getMemOperand());
   1345     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
   1346 
   1347     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
   1348                Result.getNode()->dump(&DAG); dbgs() << '\n');
   1349     WorklistRemover DeadNodes(*this);
   1350     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
   1351     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
   1352     deleteAndRecombine(N);
   1353     AddToWorklist(Result.getNode());
   1354     return true;
   1355   }
   1356   return false;
   1357 }
   1358 
   1359 /// Recursively delete a node which has no uses and any operands for
   1360 /// which it is the only use.
   1361 ///
   1362 /// Note that this both deletes the nodes and removes them from the worklist.
   1363 /// It also adds any nodes who have had a user deleted to the worklist as they
   1364 /// may now have only one use and subject to other combines.
   1365 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
   1366   if (!N->use_empty())
   1367     return false;
   1368 
   1369   SmallSetVector<SDNode *, 16> Nodes;
   1370   Nodes.insert(N);
   1371   do {
   1372     N = Nodes.pop_back_val();
   1373     if (!N)
   1374       continue;
   1375 
   1376     if (N->use_empty()) {
   1377       for (const SDValue &ChildN : N->op_values())
   1378         Nodes.insert(ChildN.getNode());
   1379 
   1380       removeFromWorklist(N);
   1381       DAG.DeleteNode(N);
   1382     } else {
   1383       AddToWorklist(N);
   1384     }
   1385   } while (!Nodes.empty());
   1386   return true;
   1387 }
   1388 
   1389 //===----------------------------------------------------------------------===//
   1390 //  Main DAG Combiner implementation
   1391 //===----------------------------------------------------------------------===//
   1392 
   1393 void DAGCombiner::Run(CombineLevel AtLevel) {
   1394   // set the instance variables, so that the various visit routines may use it.
   1395   Level = AtLevel;
   1396   LegalOperations = Level >= AfterLegalizeVectorOps;
   1397   LegalTypes = Level >= AfterLegalizeTypes;
   1398 
   1399   // Add all the dag nodes to the worklist.
   1400   for (SDNode &Node : DAG.allnodes())
   1401     AddToWorklist(&Node);
   1402 
   1403   // Create a dummy node (which is not added to allnodes), that adds a reference
   1404   // to the root node, preventing it from being deleted, and tracking any
   1405   // changes of the root.
   1406   HandleSDNode Dummy(DAG.getRoot());
   1407 
   1408   // While the worklist isn't empty, find a node and try to combine it.
   1409   while (!WorklistMap.empty()) {
   1410     SDNode *N;
   1411     // The Worklist holds the SDNodes in order, but it may contain null entries.
   1412     do {
   1413       N = Worklist.pop_back_val();
   1414     } while (!N);
   1415 
   1416     bool GoodWorklistEntry = WorklistMap.erase(N);
   1417     (void)GoodWorklistEntry;
   1418     assert(GoodWorklistEntry &&
   1419            "Found a worklist entry without a corresponding map entry!");
   1420 
   1421     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
   1422     // N is deleted from the DAG, since they too may now be dead or may have a
   1423     // reduced number of uses, allowing other xforms.
   1424     if (recursivelyDeleteUnusedNodes(N))
   1425       continue;
   1426 
   1427     WorklistRemover DeadNodes(*this);
   1428 
   1429     // If this combine is running after legalizing the DAG, re-legalize any
   1430     // nodes pulled off the worklist.
   1431     if (Level == AfterLegalizeDAG) {
   1432       SmallSetVector<SDNode *, 16> UpdatedNodes;
   1433       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
   1434 
   1435       for (SDNode *LN : UpdatedNodes) {
   1436         AddToWorklist(LN);
   1437         AddUsersToWorklist(LN);
   1438       }
   1439       if (!NIsValid)
   1440         continue;
   1441     }
   1442 
   1443     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
   1444 
   1445     // Add any operands of the new node which have not yet been combined to the
   1446     // worklist as well. Because the worklist uniques things already, this
   1447     // won't repeatedly process the same operand.
   1448     CombinedNodes.insert(N);
   1449     for (const SDValue &ChildN : N->op_values())
   1450       if (!CombinedNodes.count(ChildN.getNode()))
   1451         AddToWorklist(ChildN.getNode());
   1452 
   1453     SDValue RV = combine(N);
   1454 
   1455     if (!RV.getNode())
   1456       continue;
   1457 
   1458     ++NodesCombined;
   1459 
   1460     // If we get back the same node we passed in, rather than a new node or
   1461     // zero, we know that the node must have defined multiple values and
   1462     // CombineTo was used.  Since CombineTo takes care of the worklist
   1463     // mechanics for us, we have no work to do in this case.
   1464     if (RV.getNode() == N)
   1465       continue;
   1466 
   1467     assert(N->getOpcode() != ISD::DELETED_NODE &&
   1468            RV.getOpcode() != ISD::DELETED_NODE &&
   1469            "Node was deleted but visit returned new node!");
   1470 
   1471     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
   1472 
   1473     if (N->getNumValues() == RV.getNode()->getNumValues())
   1474       DAG.ReplaceAllUsesWith(N, RV.getNode());
   1475     else {
   1476       assert(N->getValueType(0) == RV.getValueType() &&
   1477              N->getNumValues() == 1 && "Type mismatch");
   1478       DAG.ReplaceAllUsesWith(N, &RV);
   1479     }
   1480 
   1481     // Push the new node and any users onto the worklist
   1482     AddToWorklist(RV.getNode());
   1483     AddUsersToWorklist(RV.getNode());
   1484 
   1485     // Finally, if the node is now dead, remove it from the graph.  The node
   1486     // may not be dead if the replacement process recursively simplified to
   1487     // something else needing this node. This will also take care of adding any
   1488     // operands which have lost a user to the worklist.
   1489     recursivelyDeleteUnusedNodes(N);
   1490   }
   1491 
   1492   // If the root changed (e.g. it was a dead load, update the root).
   1493   DAG.setRoot(Dummy.getValue());
   1494   DAG.RemoveDeadNodes();
   1495 }
   1496 
   1497 SDValue DAGCombiner::visit(SDNode *N) {
   1498   switch (N->getOpcode()) {
   1499   default: break;
   1500   case ISD::TokenFactor:        return visitTokenFactor(N);
   1501   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
   1502   case ISD::ADD:                return visitADD(N);
   1503   case ISD::SUB:                return visitSUB(N);
   1504   case ISD::ADDC:               return visitADDC(N);
   1505   case ISD::UADDO:              return visitUADDO(N);
   1506   case ISD::SUBC:               return visitSUBC(N);
   1507   case ISD::USUBO:              return visitUSUBO(N);
   1508   case ISD::ADDE:               return visitADDE(N);
   1509   case ISD::ADDCARRY:           return visitADDCARRY(N);
   1510   case ISD::SUBE:               return visitSUBE(N);
   1511   case ISD::SUBCARRY:           return visitSUBCARRY(N);
   1512   case ISD::MUL:                return visitMUL(N);
   1513   case ISD::SDIV:               return visitSDIV(N);
   1514   case ISD::UDIV:               return visitUDIV(N);
   1515   case ISD::SREM:
   1516   case ISD::UREM:               return visitREM(N);
   1517   case ISD::MULHU:              return visitMULHU(N);
   1518   case ISD::MULHS:              return visitMULHS(N);
   1519   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
   1520   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
   1521   case ISD::SMULO:              return visitSMULO(N);
   1522   case ISD::UMULO:              return visitUMULO(N);
   1523   case ISD::SMIN:
   1524   case ISD::SMAX:
   1525   case ISD::UMIN:
   1526   case ISD::UMAX:               return visitIMINMAX(N);
   1527   case ISD::AND:                return visitAND(N);
   1528   case ISD::OR:                 return visitOR(N);
   1529   case ISD::XOR:                return visitXOR(N);
   1530   case ISD::SHL:                return visitSHL(N);
   1531   case ISD::SRA:                return visitSRA(N);
   1532   case ISD::SRL:                return visitSRL(N);
   1533   case ISD::ROTR:
   1534   case ISD::ROTL:               return visitRotate(N);
   1535   case ISD::ABS:                return visitABS(N);
   1536   case ISD::BSWAP:              return visitBSWAP(N);
   1537   case ISD::BITREVERSE:         return visitBITREVERSE(N);
   1538   case ISD::CTLZ:               return visitCTLZ(N);
   1539   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
   1540   case ISD::CTTZ:               return visitCTTZ(N);
   1541   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
   1542   case ISD::CTPOP:              return visitCTPOP(N);
   1543   case ISD::SELECT:             return visitSELECT(N);
   1544   case ISD::VSELECT:            return visitVSELECT(N);
   1545   case ISD::SELECT_CC:          return visitSELECT_CC(N);
   1546   case ISD::SETCC:              return visitSETCC(N);
   1547   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
   1548   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
   1549   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
   1550   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
   1551   case ISD::AssertSext:
   1552   case ISD::AssertZext:         return visitAssertExt(N);
   1553   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
   1554   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
   1555   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
   1556   case ISD::TRUNCATE:           return visitTRUNCATE(N);
   1557   case ISD::BITCAST:            return visitBITCAST(N);
   1558   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
   1559   case ISD::FADD:               return visitFADD(N);
   1560   case ISD::FSUB:               return visitFSUB(N);
   1561   case ISD::FMUL:               return visitFMUL(N);
   1562   case ISD::FMA:                return visitFMA(N);
   1563   case ISD::FDIV:               return visitFDIV(N);
   1564   case ISD::FREM:               return visitFREM(N);
   1565   case ISD::FSQRT:              return visitFSQRT(N);
   1566   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
   1567   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
   1568   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
   1569   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
   1570   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
   1571   case ISD::FP_ROUND:           return visitFP_ROUND(N);
   1572   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
   1573   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
   1574   case ISD::FNEG:               return visitFNEG(N);
   1575   case ISD::FABS:               return visitFABS(N);
   1576   case ISD::FFLOOR:             return visitFFLOOR(N);
   1577   case ISD::FMINNUM:            return visitFMINNUM(N);
   1578   case ISD::FMAXNUM:            return visitFMAXNUM(N);
   1579   case ISD::FCEIL:              return visitFCEIL(N);
   1580   case ISD::FTRUNC:             return visitFTRUNC(N);
   1581   case ISD::BRCOND:             return visitBRCOND(N);
   1582   case ISD::BR_CC:              return visitBR_CC(N);
   1583   case ISD::LOAD:               return visitLOAD(N);
   1584   case ISD::STORE:              return visitSTORE(N);
   1585   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
   1586   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
   1587   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
   1588   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
   1589   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
   1590   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
   1591   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
   1592   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
   1593   case ISD::MGATHER:            return visitMGATHER(N);
   1594   case ISD::MLOAD:              return visitMLOAD(N);
   1595   case ISD::MSCATTER:           return visitMSCATTER(N);
   1596   case ISD::MSTORE:             return visitMSTORE(N);
   1597   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
   1598   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
   1599   }
   1600   return SDValue();
   1601 }
   1602 
   1603 SDValue DAGCombiner::combine(SDNode *N) {
   1604   SDValue RV = visit(N);
   1605 
   1606   // If nothing happened, try a target-specific DAG combine.
   1607   if (!RV.getNode()) {
   1608     assert(N->getOpcode() != ISD::DELETED_NODE &&
   1609            "Node was deleted but visit returned NULL!");
   1610 
   1611     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
   1612         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
   1613 
   1614       // Expose the DAG combiner to the target combiner impls.
   1615       TargetLowering::DAGCombinerInfo
   1616         DagCombineInfo(DAG, Level, false, this);
   1617 
   1618       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
   1619     }
   1620   }
   1621 
   1622   // If nothing happened still, try promoting the operation.
   1623   if (!RV.getNode()) {
   1624     switch (N->getOpcode()) {
   1625     default: break;
   1626     case ISD::ADD:
   1627     case ISD::SUB:
   1628     case ISD::MUL:
   1629     case ISD::AND:
   1630     case ISD::OR:
   1631     case ISD::XOR:
   1632       RV = PromoteIntBinOp(SDValue(N, 0));
   1633       break;
   1634     case ISD::SHL:
   1635     case ISD::SRA:
   1636     case ISD::SRL:
   1637       RV = PromoteIntShiftOp(SDValue(N, 0));
   1638       break;
   1639     case ISD::SIGN_EXTEND:
   1640     case ISD::ZERO_EXTEND:
   1641     case ISD::ANY_EXTEND:
   1642       RV = PromoteExtend(SDValue(N, 0));
   1643       break;
   1644     case ISD::LOAD:
   1645       if (PromoteLoad(SDValue(N, 0)))
   1646         RV = SDValue(N, 0);
   1647       break;
   1648     }
   1649   }
   1650 
   1651   // If N is a commutative binary node, try eliminate it if the commuted
   1652   // version is already present in the DAG.
   1653   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
   1654       N->getNumValues() == 1) {
   1655     SDValue N0 = N->getOperand(0);
   1656     SDValue N1 = N->getOperand(1);
   1657 
   1658     // Constant operands are canonicalized to RHS.
   1659     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
   1660       SDValue Ops[] = {N1, N0};
   1661       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
   1662                                             N->getFlags());
   1663       if (CSENode)
   1664         return SDValue(CSENode, 0);
   1665     }
   1666   }
   1667 
   1668   return RV;
   1669 }
   1670 
   1671 /// Given a node, return its input chain if it has one, otherwise return a null
   1672 /// sd operand.
   1673 static SDValue getInputChainForNode(SDNode *N) {
   1674   if (unsigned NumOps = N->getNumOperands()) {
   1675     if (N->getOperand(0).getValueType() == MVT::Other)
   1676       return N->getOperand(0);
   1677     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
   1678       return N->getOperand(NumOps-1);
   1679     for (unsigned i = 1; i < NumOps-1; ++i)
   1680       if (N->getOperand(i).getValueType() == MVT::Other)
   1681         return N->getOperand(i);
   1682   }
   1683   return SDValue();
   1684 }
   1685 
   1686 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
   1687   // If N has two operands, where one has an input chain equal to the other,
   1688   // the 'other' chain is redundant.
   1689   if (N->getNumOperands() == 2) {
   1690     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
   1691       return N->getOperand(0);
   1692     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
   1693       return N->getOperand(1);
   1694   }
   1695 
   1696   // Don't simplify token factors if optnone.
   1697   if (OptLevel == CodeGenOpt::None)
   1698     return SDValue();
   1699 
   1700   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
   1701   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
   1702   SmallPtrSet<SDNode*, 16> SeenOps;
   1703   bool Changed = false;             // If we should replace this token factor.
   1704 
   1705   // Start out with this token factor.
   1706   TFs.push_back(N);
   1707 
   1708   // Iterate through token factors.  The TFs grows when new token factors are
   1709   // encountered.
   1710   for (unsigned i = 0; i < TFs.size(); ++i) {
   1711     SDNode *TF = TFs[i];
   1712 
   1713     // Check each of the operands.
   1714     for (const SDValue &Op : TF->op_values()) {
   1715       switch (Op.getOpcode()) {
   1716       case ISD::EntryToken:
   1717         // Entry tokens don't need to be added to the list. They are
   1718         // redundant.
   1719         Changed = true;
   1720         break;
   1721 
   1722       case ISD::TokenFactor:
   1723         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
   1724           // Queue up for processing.
   1725           TFs.push_back(Op.getNode());
   1726           // Clean up in case the token factor is removed.
   1727           AddToWorklist(Op.getNode());
   1728           Changed = true;
   1729           break;
   1730         }
   1731         LLVM_FALLTHROUGH;
   1732 
   1733       default:
   1734         // Only add if it isn't already in the list.
   1735         if (SeenOps.insert(Op.getNode()).second)
   1736           Ops.push_back(Op);
   1737         else
   1738           Changed = true;
   1739         break;
   1740       }
   1741     }
   1742   }
   1743 
   1744   // Remove Nodes that are chained to another node in the list. Do so
   1745   // by walking up chains breath-first stopping when we've seen
   1746   // another operand. In general we must climb to the EntryNode, but we can exit
   1747   // early if we find all remaining work is associated with just one operand as
   1748   // no further pruning is possible.
   1749 
   1750   // List of nodes to search through and original Ops from which they originate.
   1751   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
   1752   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
   1753   SmallPtrSet<SDNode *, 16> SeenChains;
   1754   bool DidPruneOps = false;
   1755 
   1756   unsigned NumLeftToConsider = 0;
   1757   for (const SDValue &Op : Ops) {
   1758     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
   1759     OpWorkCount.push_back(1);
   1760   }
   1761 
   1762   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
   1763     // If this is an Op, we can remove the op from the list. Remark any
   1764     // search associated with it as from the current OpNumber.
   1765     if (SeenOps.count(Op) != 0) {
   1766       Changed = true;
   1767       DidPruneOps = true;
   1768       unsigned OrigOpNumber = 0;
   1769       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
   1770         OrigOpNumber++;
   1771       assert((OrigOpNumber != Ops.size()) &&
   1772              "expected to find TokenFactor Operand");
   1773       // Re-mark worklist from OrigOpNumber to OpNumber
   1774       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
   1775         if (Worklist[i].second == OrigOpNumber) {
   1776           Worklist[i].second = OpNumber;
   1777         }
   1778       }
   1779       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
   1780       OpWorkCount[OrigOpNumber] = 0;
   1781       NumLeftToConsider--;
   1782     }
   1783     // Add if it's a new chain
   1784     if (SeenChains.insert(Op).second) {
   1785       OpWorkCount[OpNumber]++;
   1786       Worklist.push_back(std::make_pair(Op, OpNumber));
   1787     }
   1788   };
   1789 
   1790   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
   1791     // We need at least be consider at least 2 Ops to prune.
   1792     if (NumLeftToConsider <= 1)
   1793       break;
   1794     auto CurNode = Worklist[i].first;
   1795     auto CurOpNumber = Worklist[i].second;
   1796     assert((OpWorkCount[CurOpNumber] > 0) &&
   1797            "Node should not appear in worklist");
   1798     switch (CurNode->getOpcode()) {
   1799     case ISD::EntryToken:
   1800       // Hitting EntryToken is the only way for the search to terminate without
   1801       // hitting
   1802       // another operand's search. Prevent us from marking this operand
   1803       // considered.
   1804       NumLeftToConsider++;
   1805       break;
   1806     case ISD::TokenFactor:
   1807       for (const SDValue &Op : CurNode->op_values())
   1808         AddToWorklist(i, Op.getNode(), CurOpNumber);
   1809       break;
   1810     case ISD::CopyFromReg:
   1811     case ISD::CopyToReg:
   1812       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
   1813       break;
   1814     default:
   1815       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
   1816         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
   1817       break;
   1818     }
   1819     OpWorkCount[CurOpNumber]--;
   1820     if (OpWorkCount[CurOpNumber] == 0)
   1821       NumLeftToConsider--;
   1822   }
   1823 
   1824   // If we've changed things around then replace token factor.
   1825   if (Changed) {
   1826     SDValue Result;
   1827     if (Ops.empty()) {
   1828       // The entry token is the only possible outcome.
   1829       Result = DAG.getEntryNode();
   1830     } else {
   1831       if (DidPruneOps) {
   1832         SmallVector<SDValue, 8> PrunedOps;
   1833         //
   1834         for (const SDValue &Op : Ops) {
   1835           if (SeenChains.count(Op.getNode()) == 0)
   1836             PrunedOps.push_back(Op);
   1837         }
   1838         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
   1839       } else {
   1840         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
   1841       }
   1842     }
   1843     return Result;
   1844   }
   1845   return SDValue();
   1846 }
   1847 
   1848 /// MERGE_VALUES can always be eliminated.
   1849 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
   1850   WorklistRemover DeadNodes(*this);
   1851   // Replacing results may cause a different MERGE_VALUES to suddenly
   1852   // be CSE'd with N, and carry its uses with it. Iterate until no
   1853   // uses remain, to ensure that the node can be safely deleted.
   1854   // First add the users of this node to the work list so that they
   1855   // can be tried again once they have new operands.
   1856   AddUsersToWorklist(N);
   1857   do {
   1858     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
   1859       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
   1860   } while (!N->use_empty());
   1861   deleteAndRecombine(N);
   1862   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   1863 }
   1864 
   1865 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
   1866 /// ConstantSDNode pointer else nullptr.
   1867 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
   1868   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
   1869   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
   1870 }
   1871 
   1872 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
   1873   auto BinOpcode = BO->getOpcode();
   1874   assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
   1875           BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
   1876           BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
   1877           BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
   1878           BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
   1879           BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
   1880           BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
   1881           BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
   1882           BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
   1883          "Unexpected binary operator");
   1884 
   1885   // Don't do this unless the old select is going away. We want to eliminate the
   1886   // binary operator, not replace a binop with a select.
   1887   // TODO: Handle ISD::SELECT_CC.
   1888   unsigned SelOpNo = 0;
   1889   SDValue Sel = BO->getOperand(0);
   1890   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
   1891     SelOpNo = 1;
   1892     Sel = BO->getOperand(1);
   1893   }
   1894 
   1895   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
   1896     return SDValue();
   1897 
   1898   SDValue CT = Sel.getOperand(1);
   1899   if (!isConstantOrConstantVector(CT, true) &&
   1900       !isConstantFPBuildVectorOrConstantFP(CT))
   1901     return SDValue();
   1902 
   1903   SDValue CF = Sel.getOperand(2);
   1904   if (!isConstantOrConstantVector(CF, true) &&
   1905       !isConstantFPBuildVectorOrConstantFP(CF))
   1906     return SDValue();
   1907 
   1908   // Bail out if any constants are opaque because we can't constant fold those.
   1909   // The exception is "and" and "or" with either 0 or -1 in which case we can
   1910   // propagate non constant operands into select. I.e.:
   1911   // and (select Cond, 0, -1), X --> select Cond, 0, X
   1912   // or X, (select Cond, -1, 0) --> select Cond, -1, X
   1913   bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
   1914                          (isNullConstantOrNullSplatConstant(CT) ||
   1915                           isAllOnesConstantOrAllOnesSplatConstant(CT)) &&
   1916                          (isNullConstantOrNullSplatConstant(CF) ||
   1917                           isAllOnesConstantOrAllOnesSplatConstant(CF));
   1918 
   1919   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
   1920   if (!CanFoldNonConst &&
   1921       !isConstantOrConstantVector(CBO, true) &&
   1922       !isConstantFPBuildVectorOrConstantFP(CBO))
   1923     return SDValue();
   1924 
   1925   EVT VT = Sel.getValueType();
   1926 
   1927   // In case of shift value and shift amount may have different VT. For instance
   1928   // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
   1929   // swapped operands and value types do not match. NB: x86 is fine if operands
   1930   // are not swapped with shift amount VT being not bigger than shifted value.
   1931   // TODO: that is possible to check for a shift operation, correct VTs and
   1932   // still perform optimization on x86 if needed.
   1933   if (SelOpNo && VT != CBO.getValueType())
   1934     return SDValue();
   1935 
   1936   // We have a select-of-constants followed by a binary operator with a
   1937   // constant. Eliminate the binop by pulling the constant math into the select.
   1938   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
   1939   SDLoc DL(Sel);
   1940   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
   1941                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
   1942   if (!CanFoldNonConst && !NewCT.isUndef() &&
   1943       !isConstantOrConstantVector(NewCT, true) &&
   1944       !isConstantFPBuildVectorOrConstantFP(NewCT))
   1945     return SDValue();
   1946 
   1947   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
   1948                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
   1949   if (!CanFoldNonConst && !NewCF.isUndef() &&
   1950       !isConstantOrConstantVector(NewCF, true) &&
   1951       !isConstantFPBuildVectorOrConstantFP(NewCF))
   1952     return SDValue();
   1953 
   1954   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
   1955 }
   1956 
   1957 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
   1958   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
   1959          "Expecting add or sub");
   1960 
   1961   // Match a constant operand and a zext operand for the math instruction:
   1962   // add Z, C
   1963   // sub C, Z
   1964   bool IsAdd = N->getOpcode() == ISD::ADD;
   1965   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
   1966   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
   1967   auto *CN = dyn_cast<ConstantSDNode>(C);
   1968   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
   1969     return SDValue();
   1970 
   1971   // Match the zext operand as a setcc of a boolean.
   1972   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
   1973       Z.getOperand(0).getValueType() != MVT::i1)
   1974     return SDValue();
   1975 
   1976   // Match the compare as: setcc (X & 1), 0, eq.
   1977   SDValue SetCC = Z.getOperand(0);
   1978   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
   1979   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
   1980       SetCC.getOperand(0).getOpcode() != ISD::AND ||
   1981       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
   1982     return SDValue();
   1983 
   1984   // We are adding/subtracting a constant and an inverted low bit. Turn that
   1985   // into a subtract/add of the low bit with incremented/decremented constant:
   1986   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
   1987   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
   1988   EVT VT = C.getValueType();
   1989   SDLoc DL(N);
   1990   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
   1991   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
   1992                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
   1993   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
   1994 }
   1995 
   1996 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
   1997 /// a shift and add with a different constant.
   1998 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
   1999   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
   2000          "Expecting add or sub");
   2001 
   2002   // We need a constant operand for the add/sub, and the other operand is a
   2003   // logical shift right: add (srl), C or sub C, (srl).
   2004   bool IsAdd = N->getOpcode() == ISD::ADD;
   2005   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
   2006   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
   2007   ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
   2008   if (!C || ShiftOp.getOpcode() != ISD::SRL)
   2009     return SDValue();
   2010 
   2011   // The shift must be of a 'not' value.
   2012   // TODO: Use isBitwiseNot() if it works with vectors.
   2013   SDValue Not = ShiftOp.getOperand(0);
   2014   if (!Not.hasOneUse() || Not.getOpcode() != ISD::XOR ||
   2015       !isAllOnesConstantOrAllOnesSplatConstant(Not.getOperand(1)))
   2016     return SDValue();
   2017 
   2018   // The shift must be moving the sign bit to the least-significant-bit.
   2019   EVT VT = ShiftOp.getValueType();
   2020   SDValue ShAmt = ShiftOp.getOperand(1);
   2021   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
   2022   if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
   2023     return SDValue();
   2024 
   2025   // Eliminate the 'not' by adjusting the shift and add/sub constant:
   2026   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
   2027   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
   2028   SDLoc DL(N);
   2029   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
   2030   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
   2031   APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
   2032   return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
   2033 }
   2034 
   2035 SDValue DAGCombiner::visitADD(SDNode *N) {
   2036   SDValue N0 = N->getOperand(0);
   2037   SDValue N1 = N->getOperand(1);
   2038   EVT VT = N0.getValueType();
   2039   SDLoc DL(N);
   2040 
   2041   // fold vector ops
   2042   if (VT.isVector()) {
   2043     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   2044       return FoldedVOp;
   2045 
   2046     // fold (add x, 0) -> x, vector edition
   2047     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   2048       return N0;
   2049     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   2050       return N1;
   2051   }
   2052 
   2053   // fold (add x, undef) -> undef
   2054   if (N0.isUndef())
   2055     return N0;
   2056 
   2057   if (N1.isUndef())
   2058     return N1;
   2059 
   2060   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
   2061     // canonicalize constant to RHS
   2062     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
   2063       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
   2064     // fold (add c1, c2) -> c1+c2
   2065     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
   2066                                       N1.getNode());
   2067   }
   2068 
   2069   // fold (add x, 0) -> x
   2070   if (isNullConstant(N1))
   2071     return N0;
   2072 
   2073   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
   2074     // fold ((c1-A)+c2) -> (c1+c2)-A
   2075     if (N0.getOpcode() == ISD::SUB &&
   2076         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
   2077       // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
   2078       return DAG.getNode(ISD::SUB, DL, VT,
   2079                          DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
   2080                          N0.getOperand(1));
   2081     }
   2082 
   2083     // add (sext i1 X), 1 -> zext (not i1 X)
   2084     // We don't transform this pattern:
   2085     //   add (zext i1 X), -1 -> sext (not i1 X)
   2086     // because most (?) targets generate better code for the zext form.
   2087     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
   2088         isOneConstantOrOneSplatConstant(N1)) {
   2089       SDValue X = N0.getOperand(0);
   2090       if ((!LegalOperations ||
   2091            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
   2092             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
   2093           X.getScalarValueSizeInBits() == 1) {
   2094         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
   2095         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
   2096       }
   2097     }
   2098 
   2099     // Undo the add -> or combine to merge constant offsets from a frame index.
   2100     if (N0.getOpcode() == ISD::OR &&
   2101         isa<FrameIndexSDNode>(N0.getOperand(0)) &&
   2102         isa<ConstantSDNode>(N0.getOperand(1)) &&
   2103         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
   2104       SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
   2105       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
   2106     }
   2107   }
   2108 
   2109   if (SDValue NewSel = foldBinOpIntoSelect(N))
   2110     return NewSel;
   2111 
   2112   // reassociate add
   2113   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
   2114     return RADD;
   2115 
   2116   // fold ((0-A) + B) -> B-A
   2117   if (N0.getOpcode() == ISD::SUB &&
   2118       isNullConstantOrNullSplatConstant(N0.getOperand(0)))
   2119     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
   2120 
   2121   // fold (A + (0-B)) -> A-B
   2122   if (N1.getOpcode() == ISD::SUB &&
   2123       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
   2124     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
   2125 
   2126   // fold (A+(B-A)) -> B
   2127   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
   2128     return N1.getOperand(0);
   2129 
   2130   // fold ((B-A)+A) -> B
   2131   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
   2132     return N0.getOperand(0);
   2133 
   2134   // fold (A+(B-(A+C))) to (B-C)
   2135   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
   2136       N0 == N1.getOperand(1).getOperand(0))
   2137     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
   2138                        N1.getOperand(1).getOperand(1));
   2139 
   2140   // fold (A+(B-(C+A))) to (B-C)
   2141   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
   2142       N0 == N1.getOperand(1).getOperand(1))
   2143     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
   2144                        N1.getOperand(1).getOperand(0));
   2145 
   2146   // fold (A+((B-A)+or-C)) to (B+or-C)
   2147   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
   2148       N1.getOperand(0).getOpcode() == ISD::SUB &&
   2149       N0 == N1.getOperand(0).getOperand(1))
   2150     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
   2151                        N1.getOperand(1));
   2152 
   2153   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
   2154   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
   2155     SDValue N00 = N0.getOperand(0);
   2156     SDValue N01 = N0.getOperand(1);
   2157     SDValue N10 = N1.getOperand(0);
   2158     SDValue N11 = N1.getOperand(1);
   2159 
   2160     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
   2161       return DAG.getNode(ISD::SUB, DL, VT,
   2162                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
   2163                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
   2164   }
   2165 
   2166   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
   2167     return V;
   2168 
   2169   if (SDValue V = foldAddSubOfSignBit(N, DAG))
   2170     return V;
   2171 
   2172   if (SimplifyDemandedBits(SDValue(N, 0)))
   2173     return SDValue(N, 0);
   2174 
   2175   // fold (a+b) -> (a|b) iff a and b share no bits.
   2176   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
   2177       DAG.haveNoCommonBitsSet(N0, N1))
   2178     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
   2179 
   2180   // fold (add (xor a, -1), 1) -> (sub 0, a)
   2181   if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1))
   2182     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
   2183                        N0.getOperand(0));
   2184 
   2185   if (SDValue Combined = visitADDLike(N0, N1, N))
   2186     return Combined;
   2187 
   2188   if (SDValue Combined = visitADDLike(N1, N0, N))
   2189     return Combined;
   2190 
   2191   return SDValue();
   2192 }
   2193 
   2194 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
   2195   bool Masked = false;
   2196 
   2197   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
   2198   while (true) {
   2199     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
   2200       V = V.getOperand(0);
   2201       continue;
   2202     }
   2203 
   2204     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
   2205       Masked = true;
   2206       V = V.getOperand(0);
   2207       continue;
   2208     }
   2209 
   2210     break;
   2211   }
   2212 
   2213   // If this is not a carry, return.
   2214   if (V.getResNo() != 1)
   2215     return SDValue();
   2216 
   2217   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
   2218       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
   2219     return SDValue();
   2220 
   2221   // If the result is masked, then no matter what kind of bool it is we can
   2222   // return. If it isn't, then we need to make sure the bool type is either 0 or
   2223   // 1 and not other values.
   2224   if (Masked ||
   2225       TLI.getBooleanContents(V.getValueType()) ==
   2226           TargetLoweringBase::ZeroOrOneBooleanContent)
   2227     return V;
   2228 
   2229   return SDValue();
   2230 }
   2231 
   2232 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
   2233   EVT VT = N0.getValueType();
   2234   SDLoc DL(LocReference);
   2235 
   2236   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
   2237   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
   2238       isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
   2239     return DAG.getNode(ISD::SUB, DL, VT, N0,
   2240                        DAG.getNode(ISD::SHL, DL, VT,
   2241                                    N1.getOperand(0).getOperand(1),
   2242                                    N1.getOperand(1)));
   2243 
   2244   if (N1.getOpcode() == ISD::AND) {
   2245     SDValue AndOp0 = N1.getOperand(0);
   2246     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
   2247     unsigned DestBits = VT.getScalarSizeInBits();
   2248 
   2249     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
   2250     // and similar xforms where the inner op is either ~0 or 0.
   2251     if (NumSignBits == DestBits &&
   2252         isOneConstantOrOneSplatConstant(N1->getOperand(1)))
   2253       return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
   2254   }
   2255 
   2256   // add (sext i1), X -> sub X, (zext i1)
   2257   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
   2258       N0.getOperand(0).getValueType() == MVT::i1 &&
   2259       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
   2260     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
   2261     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
   2262   }
   2263 
   2264   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
   2265   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
   2266     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
   2267     if (TN->getVT() == MVT::i1) {
   2268       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
   2269                                  DAG.getConstant(1, DL, VT));
   2270       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
   2271     }
   2272   }
   2273 
   2274   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
   2275   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
   2276       N1.getResNo() == 0)
   2277     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
   2278                        N0, N1.getOperand(0), N1.getOperand(2));
   2279 
   2280   // (add X, Carry) -> (addcarry X, 0, Carry)
   2281   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
   2282     if (SDValue Carry = getAsCarry(TLI, N1))
   2283       return DAG.getNode(ISD::ADDCARRY, DL,
   2284                          DAG.getVTList(VT, Carry.getValueType()), N0,
   2285                          DAG.getConstant(0, DL, VT), Carry);
   2286 
   2287   return SDValue();
   2288 }
   2289 
   2290 SDValue DAGCombiner::visitADDC(SDNode *N) {
   2291   SDValue N0 = N->getOperand(0);
   2292   SDValue N1 = N->getOperand(1);
   2293   EVT VT = N0.getValueType();
   2294   SDLoc DL(N);
   2295 
   2296   // If the flag result is dead, turn this into an ADD.
   2297   if (!N->hasAnyUseOfValue(1))
   2298     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
   2299                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
   2300 
   2301   // canonicalize constant to RHS.
   2302   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   2303   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   2304   if (N0C && !N1C)
   2305     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
   2306 
   2307   // fold (addc x, 0) -> x + no carry out
   2308   if (isNullConstant(N1))
   2309     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
   2310                                         DL, MVT::Glue));
   2311 
   2312   // If it cannot overflow, transform into an add.
   2313   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
   2314     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
   2315                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
   2316 
   2317   return SDValue();
   2318 }
   2319 
   2320 static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
   2321                            SelectionDAG &DAG, const TargetLowering &TLI) {
   2322   SDValue Cst;
   2323   switch (TLI.getBooleanContents(VT)) {
   2324   case TargetLowering::ZeroOrOneBooleanContent:
   2325   case TargetLowering::UndefinedBooleanContent:
   2326     Cst = DAG.getConstant(1, DL, VT);
   2327     break;
   2328   case TargetLowering::ZeroOrNegativeOneBooleanContent:
   2329     Cst = DAG.getConstant(-1, DL, VT);
   2330     break;
   2331   }
   2332 
   2333   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
   2334 }
   2335 
   2336 static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
   2337   if (V.getOpcode() != ISD::XOR) return false;
   2338   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1));
   2339   if (!Const) return false;
   2340 
   2341   switch(TLI.getBooleanContents(VT)) {
   2342     case TargetLowering::ZeroOrOneBooleanContent:
   2343       return Const->isOne();
   2344     case TargetLowering::ZeroOrNegativeOneBooleanContent:
   2345       return Const->isAllOnesValue();
   2346     case TargetLowering::UndefinedBooleanContent:
   2347       return (Const->getAPIntValue() & 0x01) == 1;
   2348   }
   2349   llvm_unreachable("Unsupported boolean content");
   2350 }
   2351 
   2352 SDValue DAGCombiner::visitUADDO(SDNode *N) {
   2353   SDValue N0 = N->getOperand(0);
   2354   SDValue N1 = N->getOperand(1);
   2355   EVT VT = N0.getValueType();
   2356   if (VT.isVector())
   2357     return SDValue();
   2358 
   2359   EVT CarryVT = N->getValueType(1);
   2360   SDLoc DL(N);
   2361 
   2362   // If the flag result is dead, turn this into an ADD.
   2363   if (!N->hasAnyUseOfValue(1))
   2364     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
   2365                      DAG.getUNDEF(CarryVT));
   2366 
   2367   // canonicalize constant to RHS.
   2368   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   2369   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   2370   if (N0C && !N1C)
   2371     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
   2372 
   2373   // fold (uaddo x, 0) -> x + no carry out
   2374   if (isNullConstant(N1))
   2375     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
   2376 
   2377   // If it cannot overflow, transform into an add.
   2378   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
   2379     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
   2380                      DAG.getConstant(0, DL, CarryVT));
   2381 
   2382   // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
   2383   if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) {
   2384     SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
   2385                               DAG.getConstant(0, DL, VT),
   2386                               N0.getOperand(0));
   2387     return CombineTo(N, Sub,
   2388                      flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
   2389   }
   2390 
   2391   if (SDValue Combined = visitUADDOLike(N0, N1, N))
   2392     return Combined;
   2393 
   2394   if (SDValue Combined = visitUADDOLike(N1, N0, N))
   2395     return Combined;
   2396 
   2397   return SDValue();
   2398 }
   2399 
   2400 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
   2401   auto VT = N0.getValueType();
   2402 
   2403   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
   2404   // If Y + 1 cannot overflow.
   2405   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
   2406     SDValue Y = N1.getOperand(0);
   2407     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
   2408     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
   2409       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
   2410                          N1.getOperand(2));
   2411   }
   2412 
   2413   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
   2414   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
   2415     if (SDValue Carry = getAsCarry(TLI, N1))
   2416       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
   2417                          DAG.getConstant(0, SDLoc(N), VT), Carry);
   2418 
   2419   return SDValue();
   2420 }
   2421 
   2422 SDValue DAGCombiner::visitADDE(SDNode *N) {
   2423   SDValue N0 = N->getOperand(0);
   2424   SDValue N1 = N->getOperand(1);
   2425   SDValue CarryIn = N->getOperand(2);
   2426 
   2427   // canonicalize constant to RHS
   2428   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   2429   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   2430   if (N0C && !N1C)
   2431     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
   2432                        N1, N0, CarryIn);
   2433 
   2434   // fold (adde x, y, false) -> (addc x, y)
   2435   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
   2436     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
   2437 
   2438   return SDValue();
   2439 }
   2440 
   2441 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
   2442   SDValue N0 = N->getOperand(0);
   2443   SDValue N1 = N->getOperand(1);
   2444   SDValue CarryIn = N->getOperand(2);
   2445   SDLoc DL(N);
   2446 
   2447   // canonicalize constant to RHS
   2448   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   2449   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   2450   if (N0C && !N1C)
   2451     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
   2452 
   2453   // fold (addcarry x, y, false) -> (uaddo x, y)
   2454   if (isNullConstant(CarryIn)) {
   2455     if (!LegalOperations ||
   2456         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
   2457       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
   2458   }
   2459 
   2460   EVT CarryVT = CarryIn.getValueType();
   2461 
   2462   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
   2463   if (isNullConstant(N0) && isNullConstant(N1)) {
   2464     EVT VT = N0.getValueType();
   2465     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
   2466     AddToWorklist(CarryExt.getNode());
   2467     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
   2468                                     DAG.getConstant(1, DL, VT)),
   2469                      DAG.getConstant(0, DL, CarryVT));
   2470   }
   2471 
   2472   // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
   2473   if (isBitwiseNot(N0) && isNullConstant(N1) &&
   2474       isBooleanFlip(CarryIn, CarryVT, TLI)) {
   2475     SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
   2476                               DAG.getConstant(0, DL, N0.getValueType()),
   2477                               N0.getOperand(0), CarryIn.getOperand(0));
   2478     return CombineTo(N, Sub,
   2479                      flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
   2480   }
   2481 
   2482   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
   2483     return Combined;
   2484 
   2485   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
   2486     return Combined;
   2487 
   2488   return SDValue();
   2489 }
   2490 
   2491 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
   2492                                        SDNode *N) {
   2493   // Iff the flag result is dead:
   2494   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
   2495   if ((N0.getOpcode() == ISD::ADD ||
   2496        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
   2497       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
   2498     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
   2499                        N0.getOperand(0), N0.getOperand(1), CarryIn);
   2500 
   2501   /**
   2502    * When one of the addcarry argument is itself a carry, we may be facing
   2503    * a diamond carry propagation. In which case we try to transform the DAG
   2504    * to ensure linear carry propagation if that is possible.
   2505    *
   2506    * We are trying to get:
   2507    *   (addcarry X, 0, (addcarry A, B, Z):Carry)
   2508    */
   2509   if (auto Y = getAsCarry(TLI, N1)) {
   2510     /**
   2511      *            (uaddo A, B)
   2512      *             /       \
   2513      *          Carry      Sum
   2514      *            |          \
   2515      *            | (addcarry *, 0, Z)
   2516      *            |       /
   2517      *             \   Carry
   2518      *              |   /
   2519      * (addcarry X, *, *)
   2520      */
   2521     if (Y.getOpcode() == ISD::UADDO &&
   2522         CarryIn.getResNo() == 1 &&
   2523         CarryIn.getOpcode() == ISD::ADDCARRY &&
   2524         isNullConstant(CarryIn.getOperand(1)) &&
   2525         CarryIn.getOperand(0) == Y.getValue(0)) {
   2526       auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
   2527                               Y.getOperand(0), Y.getOperand(1),
   2528                               CarryIn.getOperand(2));
   2529       AddToWorklist(NewY.getNode());
   2530       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
   2531                          DAG.getConstant(0, SDLoc(N), N0.getValueType()),
   2532                          NewY.getValue(1));
   2533     }
   2534   }
   2535 
   2536   return SDValue();
   2537 }
   2538 
   2539 // Since it may not be valid to emit a fold to zero for vector initializers
   2540 // check if we can before folding.
   2541 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
   2542                              SelectionDAG &DAG, bool LegalOperations,
   2543                              bool LegalTypes) {
   2544   if (!VT.isVector())
   2545     return DAG.getConstant(0, DL, VT);
   2546   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
   2547     return DAG.getConstant(0, DL, VT);
   2548   return SDValue();
   2549 }
   2550 
   2551 SDValue DAGCombiner::visitSUB(SDNode *N) {
   2552   SDValue N0 = N->getOperand(0);
   2553   SDValue N1 = N->getOperand(1);
   2554   EVT VT = N0.getValueType();
   2555   SDLoc DL(N);
   2556 
   2557   // fold vector ops
   2558   if (VT.isVector()) {
   2559     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   2560       return FoldedVOp;
   2561 
   2562     // fold (sub x, 0) -> x, vector edition
   2563     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   2564       return N0;
   2565   }
   2566 
   2567   // fold (sub x, x) -> 0
   2568   // FIXME: Refactor this and xor and other similar operations together.
   2569   if (N0 == N1)
   2570     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
   2571   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
   2572       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
   2573     // fold (sub c1, c2) -> c1-c2
   2574     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
   2575                                       N1.getNode());
   2576   }
   2577 
   2578   if (SDValue NewSel = foldBinOpIntoSelect(N))
   2579     return NewSel;
   2580 
   2581   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   2582 
   2583   // fold (sub x, c) -> (add x, -c)
   2584   if (N1C) {
   2585     return DAG.getNode(ISD::ADD, DL, VT, N0,
   2586                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
   2587   }
   2588 
   2589   if (isNullConstantOrNullSplatConstant(N0)) {
   2590     unsigned BitWidth = VT.getScalarSizeInBits();
   2591     // Right-shifting everything out but the sign bit followed by negation is
   2592     // the same as flipping arithmetic/logical shift type without the negation:
   2593     // -(X >>u 31) -> (X >>s 31)
   2594     // -(X >>s 31) -> (X >>u 31)
   2595     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
   2596       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
   2597       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
   2598         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
   2599         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
   2600           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
   2601       }
   2602     }
   2603 
   2604     // 0 - X --> 0 if the sub is NUW.
   2605     if (N->getFlags().hasNoUnsignedWrap())
   2606       return N0;
   2607 
   2608     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
   2609       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
   2610       // N1 must be 0 because negating the minimum signed value is undefined.
   2611       if (N->getFlags().hasNoSignedWrap())
   2612         return N0;
   2613 
   2614       // 0 - X --> X if X is 0 or the minimum signed value.
   2615       return N1;
   2616     }
   2617   }
   2618 
   2619   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
   2620   if (isAllOnesConstantOrAllOnesSplatConstant(N0))
   2621     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
   2622 
   2623   // fold (A - (0-B)) -> A+B
   2624   if (N1.getOpcode() == ISD::SUB &&
   2625       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
   2626     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
   2627 
   2628   // fold A-(A-B) -> B
   2629   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
   2630     return N1.getOperand(1);
   2631 
   2632   // fold (A+B)-A -> B
   2633   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
   2634     return N0.getOperand(1);
   2635 
   2636   // fold (A+B)-B -> A
   2637   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
   2638     return N0.getOperand(0);
   2639 
   2640   // fold C2-(A+C1) -> (C2-C1)-A
   2641   if (N1.getOpcode() == ISD::ADD) {
   2642     SDValue N11 = N1.getOperand(1);
   2643     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
   2644         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
   2645       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
   2646       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
   2647     }
   2648   }
   2649 
   2650   // fold ((A+(B+or-C))-B) -> A+or-C
   2651   if (N0.getOpcode() == ISD::ADD &&
   2652       (N0.getOperand(1).getOpcode() == ISD::SUB ||
   2653        N0.getOperand(1).getOpcode() == ISD::ADD) &&
   2654       N0.getOperand(1).getOperand(0) == N1)
   2655     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
   2656                        N0.getOperand(1).getOperand(1));
   2657 
   2658   // fold ((A+(C+B))-B) -> A+C
   2659   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
   2660       N0.getOperand(1).getOperand(1) == N1)
   2661     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
   2662                        N0.getOperand(1).getOperand(0));
   2663 
   2664   // fold ((A-(B-C))-C) -> A-B
   2665   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
   2666       N0.getOperand(1).getOperand(1) == N1)
   2667     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
   2668                        N0.getOperand(1).getOperand(0));
   2669 
   2670   // fold (A-(B-C)) -> A+(C-B)
   2671   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
   2672     return DAG.getNode(ISD::ADD, DL, VT, N0,
   2673                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
   2674                                    N1.getOperand(0)));
   2675 
   2676   // fold (X - (-Y * Z)) -> (X + (Y * Z))
   2677   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
   2678     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
   2679         isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) {
   2680       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
   2681                                 N1.getOperand(0).getOperand(1),
   2682                                 N1.getOperand(1));
   2683       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
   2684     }
   2685     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
   2686         isNullConstantOrNullSplatConstant(N1.getOperand(1).getOperand(0))) {
   2687       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
   2688                                 N1.getOperand(0),
   2689                                 N1.getOperand(1).getOperand(1));
   2690       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
   2691     }
   2692   }
   2693 
   2694   // If either operand of a sub is undef, the result is undef
   2695   if (N0.isUndef())
   2696     return N0;
   2697   if (N1.isUndef())
   2698     return N1;
   2699 
   2700   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
   2701     return V;
   2702 
   2703   if (SDValue V = foldAddSubOfSignBit(N, DAG))
   2704     return V;
   2705 
   2706   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
   2707   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
   2708     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
   2709       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
   2710       SDValue S0 = N1.getOperand(0);
   2711       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
   2712         unsigned OpSizeInBits = VT.getScalarSizeInBits();
   2713         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
   2714           if (C->getAPIntValue() == (OpSizeInBits - 1))
   2715             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
   2716       }
   2717     }
   2718   }
   2719 
   2720   // If the relocation model supports it, consider symbol offsets.
   2721   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
   2722     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
   2723       // fold (sub Sym, c) -> Sym-c
   2724       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
   2725         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
   2726                                     GA->getOffset() -
   2727                                         (uint64_t)N1C->getSExtValue());
   2728       // fold (sub Sym+c1, Sym+c2) -> c1-c2
   2729       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
   2730         if (GA->getGlobal() == GB->getGlobal())
   2731           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
   2732                                  DL, VT);
   2733     }
   2734 
   2735   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
   2736   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
   2737     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
   2738     if (TN->getVT() == MVT::i1) {
   2739       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
   2740                                  DAG.getConstant(1, DL, VT));
   2741       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
   2742     }
   2743   }
   2744 
   2745   // Prefer an add for more folding potential and possibly better codegen:
   2746   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
   2747   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
   2748     SDValue ShAmt = N1.getOperand(1);
   2749     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
   2750     if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
   2751       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
   2752       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
   2753     }
   2754   }
   2755 
   2756   return SDValue();
   2757 }
   2758 
   2759 SDValue DAGCombiner::visitSUBC(SDNode *N) {
   2760   SDValue N0 = N->getOperand(0);
   2761   SDValue N1 = N->getOperand(1);
   2762   EVT VT = N0.getValueType();
   2763   SDLoc DL(N);
   2764 
   2765   // If the flag result is dead, turn this into an SUB.
   2766   if (!N->hasAnyUseOfValue(1))
   2767     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
   2768                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
   2769 
   2770   // fold (subc x, x) -> 0 + no borrow
   2771   if (N0 == N1)
   2772     return CombineTo(N, DAG.getConstant(0, DL, VT),
   2773                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
   2774 
   2775   // fold (subc x, 0) -> x + no borrow
   2776   if (isNullConstant(N1))
   2777     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
   2778 
   2779   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
   2780   if (isAllOnesConstant(N0))
   2781     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
   2782                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
   2783 
   2784   return SDValue();
   2785 }
   2786 
   2787 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
   2788   SDValue N0 = N->getOperand(0);
   2789   SDValue N1 = N->getOperand(1);
   2790   EVT VT = N0.getValueType();
   2791   if (VT.isVector())
   2792     return SDValue();
   2793 
   2794   EVT CarryVT = N->getValueType(1);
   2795   SDLoc DL(N);
   2796 
   2797   // If the flag result is dead, turn this into an SUB.
   2798   if (!N->hasAnyUseOfValue(1))
   2799     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
   2800                      DAG.getUNDEF(CarryVT));
   2801 
   2802   // fold (usubo x, x) -> 0 + no borrow
   2803   if (N0 == N1)
   2804     return CombineTo(N, DAG.getConstant(0, DL, VT),
   2805                      DAG.getConstant(0, DL, CarryVT));
   2806 
   2807   // fold (usubo x, 0) -> x + no borrow
   2808   if (isNullConstant(N1))
   2809     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
   2810 
   2811   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
   2812   if (isAllOnesConstant(N0))
   2813     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
   2814                      DAG.getConstant(0, DL, CarryVT));
   2815 
   2816   return SDValue();
   2817 }
   2818 
   2819 SDValue DAGCombiner::visitSUBE(SDNode *N) {
   2820   SDValue N0 = N->getOperand(0);
   2821   SDValue N1 = N->getOperand(1);
   2822   SDValue CarryIn = N->getOperand(2);
   2823 
   2824   // fold (sube x, y, false) -> (subc x, y)
   2825   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
   2826     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
   2827 
   2828   return SDValue();
   2829 }
   2830 
   2831 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
   2832   SDValue N0 = N->getOperand(0);
   2833   SDValue N1 = N->getOperand(1);
   2834   SDValue CarryIn = N->getOperand(2);
   2835 
   2836   // fold (subcarry x, y, false) -> (usubo x, y)
   2837   if (isNullConstant(CarryIn)) {
   2838     if (!LegalOperations ||
   2839         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
   2840       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
   2841   }
   2842 
   2843   return SDValue();
   2844 }
   2845 
   2846 SDValue DAGCombiner::visitMUL(SDNode *N) {
   2847   SDValue N0 = N->getOperand(0);
   2848   SDValue N1 = N->getOperand(1);
   2849   EVT VT = N0.getValueType();
   2850 
   2851   // fold (mul x, undef) -> 0
   2852   if (N0.isUndef() || N1.isUndef())
   2853     return DAG.getConstant(0, SDLoc(N), VT);
   2854 
   2855   bool N0IsConst = false;
   2856   bool N1IsConst = false;
   2857   bool N1IsOpaqueConst = false;
   2858   bool N0IsOpaqueConst = false;
   2859   APInt ConstValue0, ConstValue1;
   2860   // fold vector ops
   2861   if (VT.isVector()) {
   2862     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   2863       return FoldedVOp;
   2864 
   2865     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
   2866     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
   2867     assert((!N0IsConst ||
   2868             ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
   2869            "Splat APInt should be element width");
   2870     assert((!N1IsConst ||
   2871             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
   2872            "Splat APInt should be element width");
   2873   } else {
   2874     N0IsConst = isa<ConstantSDNode>(N0);
   2875     if (N0IsConst) {
   2876       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
   2877       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
   2878     }
   2879     N1IsConst = isa<ConstantSDNode>(N1);
   2880     if (N1IsConst) {
   2881       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
   2882       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
   2883     }
   2884   }
   2885 
   2886   // fold (mul c1, c2) -> c1*c2
   2887   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
   2888     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
   2889                                       N0.getNode(), N1.getNode());
   2890 
   2891   // canonicalize constant to RHS (vector doesn't have to splat)
   2892   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
   2893      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
   2894     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
   2895   // fold (mul x, 0) -> 0
   2896   if (N1IsConst && ConstValue1.isNullValue())
   2897     return N1;
   2898   // fold (mul x, 1) -> x
   2899   if (N1IsConst && ConstValue1.isOneValue())
   2900     return N0;
   2901 
   2902   if (SDValue NewSel = foldBinOpIntoSelect(N))
   2903     return NewSel;
   2904 
   2905   // fold (mul x, -1) -> 0-x
   2906   if (N1IsConst && ConstValue1.isAllOnesValue()) {
   2907     SDLoc DL(N);
   2908     return DAG.getNode(ISD::SUB, DL, VT,
   2909                        DAG.getConstant(0, DL, VT), N0);
   2910   }
   2911   // fold (mul x, (1 << c)) -> x << c
   2912   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
   2913       DAG.isKnownToBeAPowerOfTwo(N1) &&
   2914       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
   2915     SDLoc DL(N);
   2916     SDValue LogBase2 = BuildLogBase2(N1, DL);
   2917     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
   2918     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
   2919     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
   2920   }
   2921   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
   2922   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
   2923     unsigned Log2Val = (-ConstValue1).logBase2();
   2924     SDLoc DL(N);
   2925     // FIXME: If the input is something that is easily negated (e.g. a
   2926     // single-use add), we should put the negate there.
   2927     return DAG.getNode(ISD::SUB, DL, VT,
   2928                        DAG.getConstant(0, DL, VT),
   2929                        DAG.getNode(ISD::SHL, DL, VT, N0,
   2930                             DAG.getConstant(Log2Val, DL,
   2931                                       getShiftAmountTy(N0.getValueType()))));
   2932   }
   2933 
   2934   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
   2935   if (N0.getOpcode() == ISD::SHL &&
   2936       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
   2937       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
   2938     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
   2939     if (isConstantOrConstantVector(C3))
   2940       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
   2941   }
   2942 
   2943   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
   2944   // use.
   2945   {
   2946     SDValue Sh(nullptr, 0), Y(nullptr, 0);
   2947 
   2948     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
   2949     if (N0.getOpcode() == ISD::SHL &&
   2950         isConstantOrConstantVector(N0.getOperand(1)) &&
   2951         N0.getNode()->hasOneUse()) {
   2952       Sh = N0; Y = N1;
   2953     } else if (N1.getOpcode() == ISD::SHL &&
   2954                isConstantOrConstantVector(N1.getOperand(1)) &&
   2955                N1.getNode()->hasOneUse()) {
   2956       Sh = N1; Y = N0;
   2957     }
   2958 
   2959     if (Sh.getNode()) {
   2960       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
   2961       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
   2962     }
   2963   }
   2964 
   2965   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
   2966   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
   2967       N0.getOpcode() == ISD::ADD &&
   2968       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
   2969       isMulAddWithConstProfitable(N, N0, N1))
   2970       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
   2971                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
   2972                                      N0.getOperand(0), N1),
   2973                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
   2974                                      N0.getOperand(1), N1));
   2975 
   2976   // reassociate mul
   2977   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
   2978     return RMUL;
   2979 
   2980   return SDValue();
   2981 }
   2982 
   2983 /// Return true if divmod libcall is available.
   2984 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
   2985                                      const TargetLowering &TLI) {
   2986   RTLIB::Libcall LC;
   2987   EVT NodeType = Node->getValueType(0);
   2988   if (!NodeType.isSimple())
   2989     return false;
   2990   switch (NodeType.getSimpleVT().SimpleTy) {
   2991   default: return false; // No libcall for vector types.
   2992   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
   2993   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
   2994   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
   2995   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
   2996   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
   2997   }
   2998 
   2999   return TLI.getLibcallName(LC) != nullptr;
   3000 }
   3001 
   3002 /// Issue divrem if both quotient and remainder are needed.
   3003 SDValue DAGCombiner::useDivRem(SDNode *Node) {
   3004   if (Node->use_empty())
   3005     return SDValue(); // This is a dead node, leave it alone.
   3006 
   3007   unsigned Opcode = Node->getOpcode();
   3008   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
   3009   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
   3010 
   3011   // DivMod lib calls can still work on non-legal types if using lib-calls.
   3012   EVT VT = Node->getValueType(0);
   3013   if (VT.isVector() || !VT.isInteger())
   3014     return SDValue();
   3015 
   3016   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
   3017     return SDValue();
   3018 
   3019   // If DIVREM is going to get expanded into a libcall,
   3020   // but there is no libcall available, then don't combine.
   3021   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
   3022       !isDivRemLibcallAvailable(Node, isSigned, TLI))
   3023     return SDValue();
   3024 
   3025   // If div is legal, it's better to do the normal expansion
   3026   unsigned OtherOpcode = 0;
   3027   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
   3028     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
   3029     if (TLI.isOperationLegalOrCustom(Opcode, VT))
   3030       return SDValue();
   3031   } else {
   3032     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
   3033     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
   3034       return SDValue();
   3035   }
   3036 
   3037   SDValue Op0 = Node->getOperand(0);
   3038   SDValue Op1 = Node->getOperand(1);
   3039   SDValue combined;
   3040   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
   3041          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
   3042     SDNode *User = *UI;
   3043     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
   3044         User->use_empty())
   3045       continue;
   3046     // Convert the other matching node(s), too;
   3047     // otherwise, the DIVREM may get target-legalized into something
   3048     // target-specific that we won't be able to recognize.
   3049     unsigned UserOpc = User->getOpcode();
   3050     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
   3051         User->getOperand(0) == Op0 &&
   3052         User->getOperand(1) == Op1) {
   3053       if (!combined) {
   3054         if (UserOpc == OtherOpcode) {
   3055           SDVTList VTs = DAG.getVTList(VT, VT);
   3056           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
   3057         } else if (UserOpc == DivRemOpc) {
   3058           combined = SDValue(User, 0);
   3059         } else {
   3060           assert(UserOpc == Opcode);
   3061           continue;
   3062         }
   3063       }
   3064       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
   3065         CombineTo(User, combined);
   3066       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
   3067         CombineTo(User, combined.getValue(1));
   3068     }
   3069   }
   3070   return combined;
   3071 }
   3072 
   3073 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
   3074   SDValue N0 = N->getOperand(0);
   3075   SDValue N1 = N->getOperand(1);
   3076   EVT VT = N->getValueType(0);
   3077   SDLoc DL(N);
   3078 
   3079   if (DAG.isUndef(N->getOpcode(), {N0, N1}))
   3080     return DAG.getUNDEF(VT);
   3081 
   3082   // undef / X -> 0
   3083   // undef % X -> 0
   3084   if (N0.isUndef())
   3085     return DAG.getConstant(0, DL, VT);
   3086 
   3087   return SDValue();
   3088 }
   3089 
   3090 SDValue DAGCombiner::visitSDIV(SDNode *N) {
   3091   SDValue N0 = N->getOperand(0);
   3092   SDValue N1 = N->getOperand(1);
   3093   EVT VT = N->getValueType(0);
   3094   EVT CCVT = getSetCCResultType(VT);
   3095 
   3096   // fold vector ops
   3097   if (VT.isVector())
   3098     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   3099       return FoldedVOp;
   3100 
   3101   SDLoc DL(N);
   3102 
   3103   // fold (sdiv c1, c2) -> c1/c2
   3104   ConstantSDNode *N0C = isConstOrConstSplat(N0);
   3105   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   3106   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
   3107     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
   3108   // fold (sdiv X, 1) -> X
   3109   if (N1C && N1C->isOne())
   3110     return N0;
   3111   // fold (sdiv X, -1) -> 0-X
   3112   if (N1C && N1C->isAllOnesValue())
   3113     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
   3114   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
   3115   if (N1C && N1C->getAPIntValue().isMinSignedValue())
   3116     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
   3117                          DAG.getConstant(1, DL, VT),
   3118                          DAG.getConstant(0, DL, VT));
   3119 
   3120   if (SDValue V = simplifyDivRem(N, DAG))
   3121     return V;
   3122 
   3123   if (SDValue NewSel = foldBinOpIntoSelect(N))
   3124     return NewSel;
   3125 
   3126   // If we know the sign bits of both operands are zero, strength reduce to a
   3127   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
   3128   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
   3129     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
   3130 
   3131   if (SDValue V = visitSDIVLike(N0, N1, N))
   3132     return V;
   3133 
   3134   // sdiv, srem -> sdivrem
   3135   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
   3136   // true.  Otherwise, we break the simplification logic in visitREM().
   3137   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
   3138   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
   3139     if (SDValue DivRem = useDivRem(N))
   3140         return DivRem;
   3141 
   3142   return SDValue();
   3143 }
   3144 
   3145 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
   3146   SDLoc DL(N);
   3147   EVT VT = N->getValueType(0);
   3148   EVT CCVT = getSetCCResultType(VT);
   3149   unsigned BitWidth = VT.getScalarSizeInBits();
   3150 
   3151   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   3152 
   3153   // Helper for determining whether a value is a power-2 constant scalar or a
   3154   // vector of such elements.
   3155   auto IsPowerOfTwo = [](ConstantSDNode *C) {
   3156     if (C->isNullValue() || C->isOpaque())
   3157       return false;
   3158     if (C->getAPIntValue().isPowerOf2())
   3159       return true;
   3160     if ((-C->getAPIntValue()).isPowerOf2())
   3161       return true;
   3162     return false;
   3163   };
   3164 
   3165   // fold (sdiv X, pow2) -> simple ops after legalize
   3166   // FIXME: We check for the exact bit here because the generic lowering gives
   3167   // better results in that case. The target-specific lowering should learn how
   3168   // to handle exact sdivs efficiently.
   3169   if (!N->getFlags().hasExact() &&
   3170       ISD::matchUnaryPredicate(N1C ? SDValue(N1C, 0) : N1, IsPowerOfTwo)) {
   3171     // Target-specific implementation of sdiv x, pow2.
   3172     if (SDValue Res = BuildSDIVPow2(N))
   3173       return Res;
   3174 
   3175     // Create constants that are functions of the shift amount value.
   3176     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
   3177     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
   3178     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
   3179     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
   3180     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
   3181     if (!isConstantOrConstantVector(Inexact))
   3182       return SDValue();
   3183 
   3184     // Splat the sign bit into the register
   3185     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
   3186                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
   3187     AddToWorklist(Sign.getNode());
   3188 
   3189     // Add (N0 < 0) ? abs2 - 1 : 0;
   3190     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
   3191     AddToWorklist(Srl.getNode());
   3192     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
   3193     AddToWorklist(Add.getNode());
   3194     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
   3195     AddToWorklist(Sra.getNode());
   3196 
   3197     // Special case: (sdiv X, 1) -> X
   3198     // Special Case: (sdiv X, -1) -> 0-X
   3199     SDValue One = DAG.getConstant(1, DL, VT);
   3200     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
   3201     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
   3202     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
   3203     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
   3204     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
   3205 
   3206     // If dividing by a positive value, we're done. Otherwise, the result must
   3207     // be negated.
   3208     SDValue Zero = DAG.getConstant(0, DL, VT);
   3209     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
   3210 
   3211     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
   3212     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
   3213     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
   3214     return Res;
   3215   }
   3216 
   3217   // If integer divide is expensive and we satisfy the requirements, emit an
   3218   // alternate sequence.  Targets may check function attributes for size/speed
   3219   // trade-offs.
   3220   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
   3221   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
   3222     if (SDValue Op = BuildSDIV(N))
   3223       return Op;
   3224 
   3225   return SDValue();
   3226 }
   3227 
   3228 SDValue DAGCombiner::visitUDIV(SDNode *N) {
   3229   SDValue N0 = N->getOperand(0);
   3230   SDValue N1 = N->getOperand(1);
   3231   EVT VT = N->getValueType(0);
   3232   EVT CCVT = getSetCCResultType(VT);
   3233 
   3234   // fold vector ops
   3235   if (VT.isVector())
   3236     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   3237       return FoldedVOp;
   3238 
   3239   SDLoc DL(N);
   3240 
   3241   // fold (udiv c1, c2) -> c1/c2
   3242   ConstantSDNode *N0C = isConstOrConstSplat(N0);
   3243   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   3244   if (N0C && N1C)
   3245     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
   3246                                                     N0C, N1C))
   3247       return Folded;
   3248   // fold (udiv X, 1) -> X
   3249   if (N1C && N1C->isOne())
   3250     return N0;
   3251   // fold (udiv X, -1) -> select(X == -1, 1, 0)
   3252   if (N1C && N1C->getAPIntValue().isAllOnesValue())
   3253     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
   3254                          DAG.getConstant(1, DL, VT),
   3255                          DAG.getConstant(0, DL, VT));
   3256 
   3257   if (SDValue V = simplifyDivRem(N, DAG))
   3258     return V;
   3259 
   3260   if (SDValue NewSel = foldBinOpIntoSelect(N))
   3261     return NewSel;
   3262 
   3263   if (SDValue V = visitUDIVLike(N0, N1, N))
   3264     return V;
   3265 
   3266   // sdiv, srem -> sdivrem
   3267   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
   3268   // true.  Otherwise, we break the simplification logic in visitREM().
   3269   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
   3270   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
   3271     if (SDValue DivRem = useDivRem(N))
   3272         return DivRem;
   3273 
   3274   return SDValue();
   3275 }
   3276 
   3277 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
   3278   SDLoc DL(N);
   3279   EVT VT = N->getValueType(0);
   3280 
   3281   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   3282 
   3283   // fold (udiv x, (1 << c)) -> x >>u c
   3284   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
   3285       DAG.isKnownToBeAPowerOfTwo(N1)) {
   3286     SDValue LogBase2 = BuildLogBase2(N1, DL);
   3287     AddToWorklist(LogBase2.getNode());
   3288 
   3289     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
   3290     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
   3291     AddToWorklist(Trunc.getNode());
   3292     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
   3293   }
   3294 
   3295   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
   3296   if (N1.getOpcode() == ISD::SHL) {
   3297     SDValue N10 = N1.getOperand(0);
   3298     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
   3299         DAG.isKnownToBeAPowerOfTwo(N10)) {
   3300       SDValue LogBase2 = BuildLogBase2(N10, DL);
   3301       AddToWorklist(LogBase2.getNode());
   3302 
   3303       EVT ADDVT = N1.getOperand(1).getValueType();
   3304       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
   3305       AddToWorklist(Trunc.getNode());
   3306       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
   3307       AddToWorklist(Add.getNode());
   3308       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
   3309     }
   3310   }
   3311 
   3312   // fold (udiv x, c) -> alternate
   3313   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
   3314   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
   3315     if (SDValue Op = BuildUDIV(N))
   3316       return Op;
   3317 
   3318   return SDValue();
   3319 }
   3320 
   3321 // handles ISD::SREM and ISD::UREM
   3322 SDValue DAGCombiner::visitREM(SDNode *N) {
   3323   unsigned Opcode = N->getOpcode();
   3324   SDValue N0 = N->getOperand(0);
   3325   SDValue N1 = N->getOperand(1);
   3326   EVT VT = N->getValueType(0);
   3327   EVT CCVT = getSetCCResultType(VT);
   3328 
   3329   bool isSigned = (Opcode == ISD::SREM);
   3330   SDLoc DL(N);
   3331 
   3332   // fold (rem c1, c2) -> c1%c2
   3333   ConstantSDNode *N0C = isConstOrConstSplat(N0);
   3334   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   3335   if (N0C && N1C)
   3336     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
   3337       return Folded;
   3338   // fold (urem X, -1) -> select(X == -1, 0, x)
   3339   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
   3340     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
   3341                          DAG.getConstant(0, DL, VT), N0);
   3342 
   3343   if (SDValue V = simplifyDivRem(N, DAG))
   3344     return V;
   3345 
   3346   if (SDValue NewSel = foldBinOpIntoSelect(N))
   3347     return NewSel;
   3348 
   3349   if (isSigned) {
   3350     // If we know the sign bits of both operands are zero, strength reduce to a
   3351     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
   3352     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
   3353       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
   3354   } else {
   3355     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
   3356     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
   3357       // fold (urem x, pow2) -> (and x, pow2-1)
   3358       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
   3359       AddToWorklist(Add.getNode());
   3360       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
   3361     }
   3362     if (N1.getOpcode() == ISD::SHL &&
   3363         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
   3364       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
   3365       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
   3366       AddToWorklist(Add.getNode());
   3367       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
   3368     }
   3369   }
   3370 
   3371   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
   3372 
   3373   // If X/C can be simplified by the division-by-constant logic, lower
   3374   // X%C to the equivalent of X-X/C*C.
   3375   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
   3376   // speculative DIV must not cause a DIVREM conversion.  We guard against this
   3377   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
   3378   // combine will not return a DIVREM.  Regardless, checking cheapness here
   3379   // makes sense since the simplification results in fatter code.
   3380   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
   3381     SDValue OptimizedDiv =
   3382         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
   3383     if (OptimizedDiv.getNode() && OptimizedDiv.getOpcode() != ISD::UDIVREM &&
   3384         OptimizedDiv.getOpcode() != ISD::SDIVREM) {
   3385       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
   3386       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
   3387       AddToWorklist(OptimizedDiv.getNode());
   3388       AddToWorklist(Mul.getNode());
   3389       return Sub;
   3390     }
   3391   }
   3392 
   3393   // sdiv, srem -> sdivrem
   3394   if (SDValue DivRem = useDivRem(N))
   3395     return DivRem.getValue(1);
   3396 
   3397   return SDValue();
   3398 }
   3399 
   3400 SDValue DAGCombiner::visitMULHS(SDNode *N) {
   3401   SDValue N0 = N->getOperand(0);
   3402   SDValue N1 = N->getOperand(1);
   3403   EVT VT = N->getValueType(0);
   3404   SDLoc DL(N);
   3405 
   3406   if (VT.isVector()) {
   3407     // fold (mulhs x, 0) -> 0
   3408     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   3409       return N1;
   3410     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   3411       return N0;
   3412   }
   3413 
   3414   // fold (mulhs x, 0) -> 0
   3415   if (isNullConstant(N1))
   3416     return N1;
   3417   // fold (mulhs x, 1) -> (sra x, size(x)-1)
   3418   if (isOneConstant(N1))
   3419     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
   3420                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
   3421                                        getShiftAmountTy(N0.getValueType())));
   3422 
   3423   // fold (mulhs x, undef) -> 0
   3424   if (N0.isUndef() || N1.isUndef())
   3425     return DAG.getConstant(0, DL, VT);
   3426 
   3427   // If the type twice as wide is legal, transform the mulhs to a wider multiply
   3428   // plus a shift.
   3429   if (VT.isSimple() && !VT.isVector()) {
   3430     MVT Simple = VT.getSimpleVT();
   3431     unsigned SimpleSize = Simple.getSizeInBits();
   3432     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
   3433     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
   3434       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
   3435       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
   3436       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
   3437       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
   3438             DAG.getConstant(SimpleSize, DL,
   3439                             getShiftAmountTy(N1.getValueType())));
   3440       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
   3441     }
   3442   }
   3443 
   3444   return SDValue();
   3445 }
   3446 
   3447 SDValue DAGCombiner::visitMULHU(SDNode *N) {
   3448   SDValue N0 = N->getOperand(0);
   3449   SDValue N1 = N->getOperand(1);
   3450   EVT VT = N->getValueType(0);
   3451   SDLoc DL(N);
   3452 
   3453   if (VT.isVector()) {
   3454     // fold (mulhu x, 0) -> 0
   3455     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   3456       return N1;
   3457     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   3458       return N0;
   3459   }
   3460 
   3461   // fold (mulhu x, 0) -> 0
   3462   if (isNullConstant(N1))
   3463     return N1;
   3464   // fold (mulhu x, 1) -> 0
   3465   if (isOneConstant(N1))
   3466     return DAG.getConstant(0, DL, N0.getValueType());
   3467   // fold (mulhu x, undef) -> 0
   3468   if (N0.isUndef() || N1.isUndef())
   3469     return DAG.getConstant(0, DL, VT);
   3470 
   3471   // If the type twice as wide is legal, transform the mulhu to a wider multiply
   3472   // plus a shift.
   3473   if (VT.isSimple() && !VT.isVector()) {
   3474     MVT Simple = VT.getSimpleVT();
   3475     unsigned SimpleSize = Simple.getSizeInBits();
   3476     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
   3477     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
   3478       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
   3479       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
   3480       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
   3481       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
   3482             DAG.getConstant(SimpleSize, DL,
   3483                             getShiftAmountTy(N1.getValueType())));
   3484       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
   3485     }
   3486   }
   3487 
   3488   return SDValue();
   3489 }
   3490 
   3491 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
   3492 /// give the opcodes for the two computations that are being performed. Return
   3493 /// true if a simplification was made.
   3494 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
   3495                                                 unsigned HiOp) {
   3496   // If the high half is not needed, just compute the low half.
   3497   bool HiExists = N->hasAnyUseOfValue(1);
   3498   if (!HiExists &&
   3499       (!LegalOperations ||
   3500        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
   3501     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
   3502     return CombineTo(N, Res, Res);
   3503   }
   3504 
   3505   // If the low half is not needed, just compute the high half.
   3506   bool LoExists = N->hasAnyUseOfValue(0);
   3507   if (!LoExists &&
   3508       (!LegalOperations ||
   3509        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
   3510     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
   3511     return CombineTo(N, Res, Res);
   3512   }
   3513 
   3514   // If both halves are used, return as it is.
   3515   if (LoExists && HiExists)
   3516     return SDValue();
   3517 
   3518   // If the two computed results can be simplified separately, separate them.
   3519   if (LoExists) {
   3520     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
   3521     AddToWorklist(Lo.getNode());
   3522     SDValue LoOpt = combine(Lo.getNode());
   3523     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
   3524         (!LegalOperations ||
   3525          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
   3526       return CombineTo(N, LoOpt, LoOpt);
   3527   }
   3528 
   3529   if (HiExists) {
   3530     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
   3531     AddToWorklist(Hi.getNode());
   3532     SDValue HiOpt = combine(Hi.getNode());
   3533     if (HiOpt.getNode() && HiOpt != Hi &&
   3534         (!LegalOperations ||
   3535          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
   3536       return CombineTo(N, HiOpt, HiOpt);
   3537   }
   3538 
   3539   return SDValue();
   3540 }
   3541 
   3542 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
   3543   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
   3544     return Res;
   3545 
   3546   EVT VT = N->getValueType(0);
   3547   SDLoc DL(N);
   3548 
   3549   // If the type is twice as wide is legal, transform the mulhu to a wider
   3550   // multiply plus a shift.
   3551   if (VT.isSimple() && !VT.isVector()) {
   3552     MVT Simple = VT.getSimpleVT();
   3553     unsigned SimpleSize = Simple.getSizeInBits();
   3554     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
   3555     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
   3556       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
   3557       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
   3558       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
   3559       // Compute the high part as N1.
   3560       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
   3561             DAG.getConstant(SimpleSize, DL,
   3562                             getShiftAmountTy(Lo.getValueType())));
   3563       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
   3564       // Compute the low part as N0.
   3565       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
   3566       return CombineTo(N, Lo, Hi);
   3567     }
   3568   }
   3569 
   3570   return SDValue();
   3571 }
   3572 
   3573 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
   3574   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
   3575     return Res;
   3576 
   3577   EVT VT = N->getValueType(0);
   3578   SDLoc DL(N);
   3579 
   3580   // If the type is twice as wide is legal, transform the mulhu to a wider
   3581   // multiply plus a shift.
   3582   if (VT.isSimple() && !VT.isVector()) {
   3583     MVT Simple = VT.getSimpleVT();
   3584     unsigned SimpleSize = Simple.getSizeInBits();
   3585     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
   3586     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
   3587       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
   3588       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
   3589       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
   3590       // Compute the high part as N1.
   3591       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
   3592             DAG.getConstant(SimpleSize, DL,
   3593                             getShiftAmountTy(Lo.getValueType())));
   3594       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
   3595       // Compute the low part as N0.
   3596       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
   3597       return CombineTo(N, Lo, Hi);
   3598     }
   3599   }
   3600 
   3601   return SDValue();
   3602 }
   3603 
   3604 SDValue DAGCombiner::visitSMULO(SDNode *N) {
   3605   // (smulo x, 2) -> (saddo x, x)
   3606   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
   3607     if (C2->getAPIntValue() == 2)
   3608       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
   3609                          N->getOperand(0), N->getOperand(0));
   3610 
   3611   return SDValue();
   3612 }
   3613 
   3614 SDValue DAGCombiner::visitUMULO(SDNode *N) {
   3615   // (umulo x, 2) -> (uaddo x, x)
   3616   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
   3617     if (C2->getAPIntValue() == 2)
   3618       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
   3619                          N->getOperand(0), N->getOperand(0));
   3620 
   3621   return SDValue();
   3622 }
   3623 
   3624 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
   3625   SDValue N0 = N->getOperand(0);
   3626   SDValue N1 = N->getOperand(1);
   3627   EVT VT = N0.getValueType();
   3628 
   3629   // fold vector ops
   3630   if (VT.isVector())
   3631     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   3632       return FoldedVOp;
   3633 
   3634   // fold operation with constant operands.
   3635   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   3636   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   3637   if (N0C && N1C)
   3638     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
   3639 
   3640   // canonicalize constant to RHS
   3641   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
   3642      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
   3643     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
   3644 
   3645   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
   3646   // Only do this if the current op isn't legal and the flipped is.
   3647   unsigned Opcode = N->getOpcode();
   3648   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   3649   if (!TLI.isOperationLegal(Opcode, VT) &&
   3650       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
   3651       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
   3652     unsigned AltOpcode;
   3653     switch (Opcode) {
   3654     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
   3655     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
   3656     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
   3657     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
   3658     default: llvm_unreachable("Unknown MINMAX opcode");
   3659     }
   3660     if (TLI.isOperationLegal(AltOpcode, VT))
   3661       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
   3662   }
   3663 
   3664   return SDValue();
   3665 }
   3666 
   3667 /// If this is a binary operator with two operands of the same opcode, try to
   3668 /// simplify it.
   3669 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
   3670   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
   3671   EVT VT = N0.getValueType();
   3672   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
   3673 
   3674   // Bail early if none of these transforms apply.
   3675   if (N0.getNumOperands() == 0) return SDValue();
   3676 
   3677   // For each of OP in AND/OR/XOR:
   3678   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
   3679   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
   3680   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
   3681   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
   3682   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
   3683   //
   3684   // do not sink logical op inside of a vector extend, since it may combine
   3685   // into a vsetcc.
   3686   EVT Op0VT = N0.getOperand(0).getValueType();
   3687   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
   3688        N0.getOpcode() == ISD::SIGN_EXTEND ||
   3689        N0.getOpcode() == ISD::BSWAP ||
   3690        // Avoid infinite looping with PromoteIntBinOp.
   3691        (N0.getOpcode() == ISD::ANY_EXTEND &&
   3692         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
   3693        (N0.getOpcode() == ISD::TRUNCATE &&
   3694         (!TLI.isZExtFree(VT, Op0VT) ||
   3695          !TLI.isTruncateFree(Op0VT, VT)) &&
   3696         TLI.isTypeLegal(Op0VT))) &&
   3697       !VT.isVector() &&
   3698       Op0VT == N1.getOperand(0).getValueType() &&
   3699       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
   3700     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
   3701                                  N0.getOperand(0).getValueType(),
   3702                                  N0.getOperand(0), N1.getOperand(0));
   3703     AddToWorklist(ORNode.getNode());
   3704     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
   3705   }
   3706 
   3707   // For each of OP in SHL/SRL/SRA/AND...
   3708   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
   3709   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
   3710   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
   3711   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
   3712        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
   3713       N0.getOperand(1) == N1.getOperand(1)) {
   3714     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
   3715                                  N0.getOperand(0).getValueType(),
   3716                                  N0.getOperand(0), N1.getOperand(0));
   3717     AddToWorklist(ORNode.getNode());
   3718     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
   3719                        ORNode, N0.getOperand(1));
   3720   }
   3721 
   3722   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
   3723   // Only perform this optimization up until type legalization, before
   3724   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
   3725   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
   3726   // we don't want to undo this promotion.
   3727   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
   3728   // on scalars.
   3729   if ((N0.getOpcode() == ISD::BITCAST ||
   3730        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
   3731        Level <= AfterLegalizeTypes) {
   3732     SDValue In0 = N0.getOperand(0);
   3733     SDValue In1 = N1.getOperand(0);
   3734     EVT In0Ty = In0.getValueType();
   3735     EVT In1Ty = In1.getValueType();
   3736     SDLoc DL(N);
   3737     // If both incoming values are integers, and the original types are the
   3738     // same.
   3739     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
   3740       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
   3741       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
   3742       AddToWorklist(Op.getNode());
   3743       return BC;
   3744     }
   3745   }
   3746 
   3747   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
   3748   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
   3749   // If both shuffles use the same mask, and both shuffle within a single
   3750   // vector, then it is worthwhile to move the swizzle after the operation.
   3751   // The type-legalizer generates this pattern when loading illegal
   3752   // vector types from memory. In many cases this allows additional shuffle
   3753   // optimizations.
   3754   // There are other cases where moving the shuffle after the xor/and/or
   3755   // is profitable even if shuffles don't perform a swizzle.
   3756   // If both shuffles use the same mask, and both shuffles have the same first
   3757   // or second operand, then it might still be profitable to move the shuffle
   3758   // after the xor/and/or operation.
   3759   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
   3760     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
   3761     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
   3762 
   3763     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
   3764            "Inputs to shuffles are not the same type");
   3765 
   3766     // Check that both shuffles use the same mask. The masks are known to be of
   3767     // the same length because the result vector type is the same.
   3768     // Check also that shuffles have only one use to avoid introducing extra
   3769     // instructions.
   3770     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
   3771         SVN0->getMask().equals(SVN1->getMask())) {
   3772       SDValue ShOp = N0->getOperand(1);
   3773 
   3774       // Don't try to fold this node if it requires introducing a
   3775       // build vector of all zeros that might be illegal at this stage.
   3776       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
   3777         if (!LegalTypes)
   3778           ShOp = DAG.getConstant(0, SDLoc(N), VT);
   3779         else
   3780           ShOp = SDValue();
   3781       }
   3782 
   3783       // (AND (shuf (A, C), shuf (B, C))) -> shuf (AND (A, B), C)
   3784       // (OR  (shuf (A, C), shuf (B, C))) -> shuf (OR  (A, B), C)
   3785       // (XOR (shuf (A, C), shuf (B, C))) -> shuf (XOR (A, B), V_0)
   3786       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
   3787         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
   3788                                       N0->getOperand(0), N1->getOperand(0));
   3789         AddToWorklist(NewNode.getNode());
   3790         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
   3791                                     SVN0->getMask());
   3792       }
   3793 
   3794       // Don't try to fold this node if it requires introducing a
   3795       // build vector of all zeros that might be illegal at this stage.
   3796       ShOp = N0->getOperand(0);
   3797       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
   3798         if (!LegalTypes)
   3799           ShOp = DAG.getConstant(0, SDLoc(N), VT);
   3800         else
   3801           ShOp = SDValue();
   3802       }
   3803 
   3804       // (AND (shuf (C, A), shuf (C, B))) -> shuf (C, AND (A, B))
   3805       // (OR  (shuf (C, A), shuf (C, B))) -> shuf (C, OR  (A, B))
   3806       // (XOR (shuf (C, A), shuf (C, B))) -> shuf (V_0, XOR (A, B))
   3807       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
   3808         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
   3809                                       N0->getOperand(1), N1->getOperand(1));
   3810         AddToWorklist(NewNode.getNode());
   3811         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
   3812                                     SVN0->getMask());
   3813       }
   3814     }
   3815   }
   3816 
   3817   return SDValue();
   3818 }
   3819 
   3820 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
   3821 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
   3822                                        const SDLoc &DL) {
   3823   SDValue LL, LR, RL, RR, N0CC, N1CC;
   3824   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
   3825       !isSetCCEquivalent(N1, RL, RR, N1CC))
   3826     return SDValue();
   3827 
   3828   assert(N0.getValueType() == N1.getValueType() &&
   3829          "Unexpected operand types for bitwise logic op");
   3830   assert(LL.getValueType() == LR.getValueType() &&
   3831          RL.getValueType() == RR.getValueType() &&
   3832          "Unexpected operand types for setcc");
   3833 
   3834   // If we're here post-legalization or the logic op type is not i1, the logic
   3835   // op type must match a setcc result type. Also, all folds require new
   3836   // operations on the left and right operands, so those types must match.
   3837   EVT VT = N0.getValueType();
   3838   EVT OpVT = LL.getValueType();
   3839   if (LegalOperations || VT.getScalarType() != MVT::i1)
   3840     if (VT != getSetCCResultType(OpVT))
   3841       return SDValue();
   3842   if (OpVT != RL.getValueType())
   3843     return SDValue();
   3844 
   3845   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
   3846   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
   3847   bool IsInteger = OpVT.isInteger();
   3848   if (LR == RR && CC0 == CC1 && IsInteger) {
   3849     bool IsZero = isNullConstantOrNullSplatConstant(LR);
   3850     bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
   3851 
   3852     // All bits clear?
   3853     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
   3854     // All sign bits clear?
   3855     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
   3856     // Any bits set?
   3857     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
   3858     // Any sign bits set?
   3859     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
   3860 
   3861     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
   3862     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
   3863     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
   3864     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
   3865     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
   3866       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
   3867       AddToWorklist(Or.getNode());
   3868       return DAG.getSetCC(DL, VT, Or, LR, CC1);
   3869     }
   3870 
   3871     // All bits set?
   3872     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
   3873     // All sign bits set?
   3874     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
   3875     // Any bits clear?
   3876     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
   3877     // Any sign bits clear?
   3878     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
   3879 
   3880     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
   3881     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
   3882     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
   3883     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
   3884     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
   3885       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
   3886       AddToWorklist(And.getNode());
   3887       return DAG.getSetCC(DL, VT, And, LR, CC1);
   3888     }
   3889   }
   3890 
   3891   // TODO: What is the 'or' equivalent of this fold?
   3892   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
   3893   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
   3894       IsInteger && CC0 == ISD::SETNE &&
   3895       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
   3896        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
   3897     SDValue One = DAG.getConstant(1, DL, OpVT);
   3898     SDValue Two = DAG.getConstant(2, DL, OpVT);
   3899     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
   3900     AddToWorklist(Add.getNode());
   3901     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
   3902   }
   3903 
   3904   // Try more general transforms if the predicates match and the only user of
   3905   // the compares is the 'and' or 'or'.
   3906   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
   3907       N0.hasOneUse() && N1.hasOneUse()) {
   3908     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
   3909     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
   3910     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
   3911       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
   3912       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
   3913       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
   3914       SDValue Zero = DAG.getConstant(0, DL, OpVT);
   3915       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
   3916     }
   3917   }
   3918 
   3919   // Canonicalize equivalent operands to LL == RL.
   3920   if (LL == RR && LR == RL) {
   3921     CC1 = ISD::getSetCCSwappedOperands(CC1);
   3922     std::swap(RL, RR);
   3923   }
   3924 
   3925   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
   3926   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
   3927   if (LL == RL && LR == RR) {
   3928     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
   3929                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
   3930     if (NewCC != ISD::SETCC_INVALID &&
   3931         (!LegalOperations ||
   3932          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
   3933           TLI.isOperationLegal(ISD::SETCC, OpVT))))
   3934       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
   3935   }
   3936 
   3937   return SDValue();
   3938 }
   3939 
   3940 /// This contains all DAGCombine rules which reduce two values combined by
   3941 /// an And operation to a single value. This makes them reusable in the context
   3942 /// of visitSELECT(). Rules involving constants are not included as
   3943 /// visitSELECT() already handles those cases.
   3944 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
   3945   EVT VT = N1.getValueType();
   3946   SDLoc DL(N);
   3947 
   3948   // fold (and x, undef) -> 0
   3949   if (N0.isUndef() || N1.isUndef())
   3950     return DAG.getConstant(0, DL, VT);
   3951 
   3952   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
   3953     return V;
   3954 
   3955   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
   3956       VT.getSizeInBits() <= 64) {
   3957     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
   3958       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
   3959         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
   3960         // immediate for an add, but it is legal if its top c2 bits are set,
   3961         // transform the ADD so the immediate doesn't need to be materialized
   3962         // in a register.
   3963         APInt ADDC = ADDI->getAPIntValue();
   3964         APInt SRLC = SRLI->getAPIntValue();
   3965         if (ADDC.getMinSignedBits() <= 64 &&
   3966             SRLC.ult(VT.getSizeInBits()) &&
   3967             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
   3968           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
   3969                                              SRLC.getZExtValue());
   3970           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
   3971             ADDC |= Mask;
   3972             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
   3973               SDLoc DL0(N0);
   3974               SDValue NewAdd =
   3975                 DAG.getNode(ISD::ADD, DL0, VT,
   3976                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
   3977               CombineTo(N0.getNode(), NewAdd);
   3978               // Return N so it doesn't get rechecked!
   3979               return SDValue(N, 0);
   3980             }
   3981           }
   3982         }
   3983       }
   3984     }
   3985   }
   3986 
   3987   // Reduce bit extract of low half of an integer to the narrower type.
   3988   // (and (srl i64:x, K), KMask) ->
   3989   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
   3990   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
   3991     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
   3992       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
   3993         unsigned Size = VT.getSizeInBits();
   3994         const APInt &AndMask = CAnd->getAPIntValue();
   3995         unsigned ShiftBits = CShift->getZExtValue();
   3996 
   3997         // Bail out, this node will probably disappear anyway.
   3998         if (ShiftBits == 0)
   3999           return SDValue();
   4000 
   4001         unsigned MaskBits = AndMask.countTrailingOnes();
   4002         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
   4003 
   4004         if (AndMask.isMask() &&
   4005             // Required bits must not span the two halves of the integer and
   4006             // must fit in the half size type.
   4007             (ShiftBits + MaskBits <= Size / 2) &&
   4008             TLI.isNarrowingProfitable(VT, HalfVT) &&
   4009             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
   4010             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
   4011             TLI.isTruncateFree(VT, HalfVT) &&
   4012             TLI.isZExtFree(HalfVT, VT)) {
   4013           // The isNarrowingProfitable is to avoid regressions on PPC and
   4014           // AArch64 which match a few 64-bit bit insert / bit extract patterns
   4015           // on downstream users of this. Those patterns could probably be
   4016           // extended to handle extensions mixed in.
   4017 
   4018           SDValue SL(N0);
   4019           assert(MaskBits <= Size);
   4020 
   4021           // Extracting the highest bit of the low half.
   4022           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
   4023           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
   4024                                       N0.getOperand(0));
   4025 
   4026           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
   4027           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
   4028           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
   4029           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
   4030           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
   4031         }
   4032       }
   4033     }
   4034   }
   4035 
   4036   return SDValue();
   4037 }
   4038 
   4039 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
   4040                                    EVT LoadResultTy, EVT &ExtVT) {
   4041   if (!AndC->getAPIntValue().isMask())
   4042     return false;
   4043 
   4044   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
   4045 
   4046   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
   4047   EVT LoadedVT = LoadN->getMemoryVT();
   4048 
   4049   if (ExtVT == LoadedVT &&
   4050       (!LegalOperations ||
   4051        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
   4052     // ZEXTLOAD will match without needing to change the size of the value being
   4053     // loaded.
   4054     return true;
   4055   }
   4056 
   4057   // Do not change the width of a volatile load.
   4058   if (LoadN->isVolatile())
   4059     return false;
   4060 
   4061   // Do not generate loads of non-round integer types since these can
   4062   // be expensive (and would be wrong if the type is not byte sized).
   4063   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
   4064     return false;
   4065 
   4066   if (LegalOperations &&
   4067       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
   4068     return false;
   4069 
   4070   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
   4071     return false;
   4072 
   4073   return true;
   4074 }
   4075 
   4076 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
   4077                                     ISD::LoadExtType ExtType, EVT &MemVT,
   4078                                     unsigned ShAmt) {
   4079   if (!LDST)
   4080     return false;
   4081   // Only allow byte offsets.
   4082   if (ShAmt % 8)
   4083     return false;
   4084 
   4085   // Do not generate loads of non-round integer types since these can
   4086   // be expensive (and would be wrong if the type is not byte sized).
   4087   if (!MemVT.isRound())
   4088     return false;
   4089 
   4090   // Don't change the width of a volatile load.
   4091   if (LDST->isVolatile())
   4092     return false;
   4093 
   4094   // Verify that we are actually reducing a load width here.
   4095   if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
   4096     return false;
   4097 
   4098   // Ensure that this isn't going to produce an unsupported unaligned access.
   4099   if (ShAmt &&
   4100       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
   4101                               LDST->getAddressSpace(), ShAmt / 8))
   4102     return false;
   4103 
   4104   // It's not possible to generate a constant of extended or untyped type.
   4105   EVT PtrType = LDST->getBasePtr().getValueType();
   4106   if (PtrType == MVT::Untyped || PtrType.isExtended())
   4107     return false;
   4108 
   4109   if (isa<LoadSDNode>(LDST)) {
   4110     LoadSDNode *Load = cast<LoadSDNode>(LDST);
   4111     // Don't transform one with multiple uses, this would require adding a new
   4112     // load.
   4113     if (!SDValue(Load, 0).hasOneUse())
   4114       return false;
   4115 
   4116     if (LegalOperations &&
   4117         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
   4118       return false;
   4119 
   4120     // For the transform to be legal, the load must produce only two values
   4121     // (the value loaded and the chain).  Don't transform a pre-increment
   4122     // load, for example, which produces an extra value.  Otherwise the
   4123     // transformation is not equivalent, and the downstream logic to replace
   4124     // uses gets things wrong.
   4125     if (Load->getNumValues() > 2)
   4126       return false;
   4127 
   4128     // If the load that we're shrinking is an extload and we're not just
   4129     // discarding the extension we can't simply shrink the load. Bail.
   4130     // TODO: It would be possible to merge the extensions in some cases.
   4131     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
   4132         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
   4133       return false;
   4134 
   4135     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
   4136       return false;
   4137   } else {
   4138     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
   4139     StoreSDNode *Store = cast<StoreSDNode>(LDST);
   4140     // Can't write outside the original store
   4141     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
   4142       return false;
   4143 
   4144     if (LegalOperations &&
   4145         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
   4146       return false;
   4147   }
   4148   return true;
   4149 }
   4150 
   4151 bool DAGCombiner::SearchForAndLoads(SDNode *N,
   4152                                     SmallPtrSetImpl<LoadSDNode*> &Loads,
   4153                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
   4154                                     ConstantSDNode *Mask,
   4155                                     SDNode *&NodeToMask) {
   4156   // Recursively search for the operands, looking for loads which can be
   4157   // narrowed.
   4158   for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
   4159     SDValue Op = N->getOperand(i);
   4160 
   4161     if (Op.getValueType().isVector())
   4162       return false;
   4163 
   4164     // Some constants may need fixing up later if they are too large.
   4165     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
   4166       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
   4167           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
   4168         NodesWithConsts.insert(N);
   4169       continue;
   4170     }
   4171 
   4172     if (!Op.hasOneUse())
   4173       return false;
   4174 
   4175     switch(Op.getOpcode()) {
   4176     case ISD::LOAD: {
   4177       auto *Load = cast<LoadSDNode>(Op);
   4178       EVT ExtVT;
   4179       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
   4180           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
   4181 
   4182         // ZEXTLOAD is already small enough.
   4183         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
   4184             ExtVT.bitsGE(Load->getMemoryVT()))
   4185           continue;
   4186 
   4187         // Use LE to convert equal sized loads to zext.
   4188         if (ExtVT.bitsLE(Load->getMemoryVT()))
   4189           Loads.insert(Load);
   4190 
   4191         continue;
   4192       }
   4193       return false;
   4194     }
   4195     case ISD::ZERO_EXTEND:
   4196     case ISD::AssertZext: {
   4197       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
   4198       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
   4199       EVT VT = Op.getOpcode() == ISD::AssertZext ?
   4200         cast<VTSDNode>(Op.getOperand(1))->getVT() :
   4201         Op.getOperand(0).getValueType();
   4202 
   4203       // We can accept extending nodes if the mask is wider or an equal
   4204       // width to the original type.
   4205       if (ExtVT.bitsGE(VT))
   4206         continue;
   4207       break;
   4208     }
   4209     case ISD::OR:
   4210     case ISD::XOR:
   4211     case ISD::AND:
   4212       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
   4213                              NodeToMask))
   4214         return false;
   4215       continue;
   4216     }
   4217 
   4218     // Allow one node which will masked along with any loads found.
   4219     if (NodeToMask)
   4220       return false;
   4221 
   4222     // Also ensure that the node to be masked only produces one data result.
   4223     NodeToMask = Op.getNode();
   4224     if (NodeToMask->getNumValues() > 1) {
   4225       bool HasValue = false;
   4226       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
   4227         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
   4228         if (VT != MVT::Glue && VT != MVT::Other) {
   4229           if (HasValue) {
   4230             NodeToMask = nullptr;
   4231             return false;
   4232           }
   4233           HasValue = true;
   4234         }
   4235       }
   4236       assert(HasValue && "Node to be masked has no data result?");
   4237     }
   4238   }
   4239   return true;
   4240 }
   4241 
   4242 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
   4243   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
   4244   if (!Mask)
   4245     return false;
   4246 
   4247   if (!Mask->getAPIntValue().isMask())
   4248     return false;
   4249 
   4250   // No need to do anything if the and directly uses a load.
   4251   if (isa<LoadSDNode>(N->getOperand(0)))
   4252     return false;
   4253 
   4254   SmallPtrSet<LoadSDNode*, 8> Loads;
   4255   SmallPtrSet<SDNode*, 2> NodesWithConsts;
   4256   SDNode *FixupNode = nullptr;
   4257   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
   4258     if (Loads.size() == 0)
   4259       return false;
   4260 
   4261     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
   4262     SDValue MaskOp = N->getOperand(1);
   4263 
   4264     // If it exists, fixup the single node we allow in the tree that needs
   4265     // masking.
   4266     if (FixupNode) {
   4267       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
   4268       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
   4269                                 FixupNode->getValueType(0),
   4270                                 SDValue(FixupNode, 0), MaskOp);
   4271       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
   4272       if (And.getOpcode() == ISD ::AND)
   4273         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
   4274     }
   4275 
   4276     // Narrow any constants that need it.
   4277     for (auto *LogicN : NodesWithConsts) {
   4278       SDValue Op0 = LogicN->getOperand(0);
   4279       SDValue Op1 = LogicN->getOperand(1);
   4280 
   4281       if (isa<ConstantSDNode>(Op0))
   4282           std::swap(Op0, Op1);
   4283 
   4284       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
   4285                                 Op1, MaskOp);
   4286 
   4287       DAG.UpdateNodeOperands(LogicN, Op0, And);
   4288     }
   4289 
   4290     // Create narrow loads.
   4291     for (auto *Load : Loads) {
   4292       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
   4293       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
   4294                                 SDValue(Load, 0), MaskOp);
   4295       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
   4296       if (And.getOpcode() == ISD ::AND)
   4297         And = SDValue(
   4298             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
   4299       SDValue NewLoad = ReduceLoadWidth(And.getNode());
   4300       assert(NewLoad &&
   4301              "Shouldn't be masking the load if it can't be narrowed");
   4302       CombineTo(Load, NewLoad, NewLoad.getValue(1));
   4303     }
   4304     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
   4305     return true;
   4306   }
   4307   return false;
   4308 }
   4309 
   4310 // Unfold
   4311 //    x &  (-1 'logical shift' y)
   4312 // To
   4313 //    (x 'opposite logical shift' y) 'logical shift' y
   4314 // if it is better for performance.
   4315 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
   4316   assert(N->getOpcode() == ISD::AND);
   4317 
   4318   SDValue N0 = N->getOperand(0);
   4319   SDValue N1 = N->getOperand(1);
   4320 
   4321   // Do we actually prefer shifts over mask?
   4322   if (!TLI.preferShiftsToClearExtremeBits(N0))
   4323     return SDValue();
   4324 
   4325   // Try to match  (-1 '[outer] logical shift' y)
   4326   unsigned OuterShift;
   4327   unsigned InnerShift; // The opposite direction to the OuterShift.
   4328   SDValue Y;           // Shift amount.
   4329   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
   4330     if (!M.hasOneUse())
   4331       return false;
   4332     OuterShift = M->getOpcode();
   4333     if (OuterShift == ISD::SHL)
   4334       InnerShift = ISD::SRL;
   4335     else if (OuterShift == ISD::SRL)
   4336       InnerShift = ISD::SHL;
   4337     else
   4338       return false;
   4339     if (!isAllOnesConstant(M->getOperand(0)))
   4340       return false;
   4341     Y = M->getOperand(1);
   4342     return true;
   4343   };
   4344 
   4345   SDValue X;
   4346   if (matchMask(N1))
   4347     X = N0;
   4348   else if (matchMask(N0))
   4349     X = N1;
   4350   else
   4351     return SDValue();
   4352 
   4353   SDLoc DL(N);
   4354   EVT VT = N->getValueType(0);
   4355 
   4356   //     tmp = x   'opposite logical shift' y
   4357   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
   4358   //     ret = tmp 'logical shift' y
   4359   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
   4360 
   4361   return T1;
   4362 }
   4363 
   4364 SDValue DAGCombiner::visitAND(SDNode *N) {
   4365   SDValue N0 = N->getOperand(0);
   4366   SDValue N1 = N->getOperand(1);
   4367   EVT VT = N1.getValueType();
   4368 
   4369   // x & x --> x
   4370   if (N0 == N1)
   4371     return N0;
   4372 
   4373   // fold vector ops
   4374   if (VT.isVector()) {
   4375     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   4376       return FoldedVOp;
   4377 
   4378     // fold (and x, 0) -> 0, vector edition
   4379     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   4380       // do not return N0, because undef node may exist in N0
   4381       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
   4382                              SDLoc(N), N0.getValueType());
   4383     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   4384       // do not return N1, because undef node may exist in N1
   4385       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
   4386                              SDLoc(N), N1.getValueType());
   4387 
   4388     // fold (and x, -1) -> x, vector edition
   4389     if (ISD::isBuildVectorAllOnes(N0.getNode()))
   4390       return N1;
   4391     if (ISD::isBuildVectorAllOnes(N1.getNode()))
   4392       return N0;
   4393   }
   4394 
   4395   // fold (and c1, c2) -> c1&c2
   4396   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   4397   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   4398   if (N0C && N1C && !N1C->isOpaque())
   4399     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
   4400   // canonicalize constant to RHS
   4401   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
   4402      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
   4403     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
   4404   // fold (and x, -1) -> x
   4405   if (isAllOnesConstant(N1))
   4406     return N0;
   4407   // if (and x, c) is known to be zero, return 0
   4408   unsigned BitWidth = VT.getScalarSizeInBits();
   4409   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
   4410                                    APInt::getAllOnesValue(BitWidth)))
   4411     return DAG.getConstant(0, SDLoc(N), VT);
   4412 
   4413   if (SDValue NewSel = foldBinOpIntoSelect(N))
   4414     return NewSel;
   4415 
   4416   // reassociate and
   4417   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
   4418     return RAND;
   4419 
   4420   // Try to convert a constant mask AND into a shuffle clear mask.
   4421   if (VT.isVector())
   4422     if (SDValue Shuffle = XformToShuffleWithZero(N))
   4423       return Shuffle;
   4424 
   4425   // fold (and (or x, C), D) -> D if (C & D) == D
   4426   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
   4427     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
   4428   };
   4429   if (N0.getOpcode() == ISD::OR &&
   4430       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
   4431     return N1;
   4432   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
   4433   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
   4434     SDValue N0Op0 = N0.getOperand(0);
   4435     APInt Mask = ~N1C->getAPIntValue();
   4436     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
   4437     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
   4438       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
   4439                                  N0.getValueType(), N0Op0);
   4440 
   4441       // Replace uses of the AND with uses of the Zero extend node.
   4442       CombineTo(N, Zext);
   4443 
   4444       // We actually want to replace all uses of the any_extend with the
   4445       // zero_extend, to avoid duplicating things.  This will later cause this
   4446       // AND to be folded.
   4447       CombineTo(N0.getNode(), Zext);
   4448       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   4449     }
   4450   }
   4451   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
   4452   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
   4453   // already be zero by virtue of the width of the base type of the load.
   4454   //
   4455   // the 'X' node here can either be nothing or an extract_vector_elt to catch
   4456   // more cases.
   4457   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
   4458        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
   4459        N0.getOperand(0).getOpcode() == ISD::LOAD &&
   4460        N0.getOperand(0).getResNo() == 0) ||
   4461       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
   4462     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
   4463                                          N0 : N0.getOperand(0) );
   4464 
   4465     // Get the constant (if applicable) the zero'th operand is being ANDed with.
   4466     // This can be a pure constant or a vector splat, in which case we treat the
   4467     // vector as a scalar and use the splat value.
   4468     APInt Constant = APInt::getNullValue(1);
   4469     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
   4470       Constant = C->getAPIntValue();
   4471     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
   4472       APInt SplatValue, SplatUndef;
   4473       unsigned SplatBitSize;
   4474       bool HasAnyUndefs;
   4475       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
   4476                                              SplatBitSize, HasAnyUndefs);
   4477       if (IsSplat) {
   4478         // Undef bits can contribute to a possible optimisation if set, so
   4479         // set them.
   4480         SplatValue |= SplatUndef;
   4481 
   4482         // The splat value may be something like "0x00FFFFFF", which means 0 for
   4483         // the first vector value and FF for the rest, repeating. We need a mask
   4484         // that will apply equally to all members of the vector, so AND all the
   4485         // lanes of the constant together.
   4486         EVT VT = Vector->getValueType(0);
   4487         unsigned BitWidth = VT.getScalarSizeInBits();
   4488 
   4489         // If the splat value has been compressed to a bitlength lower
   4490         // than the size of the vector lane, we need to re-expand it to
   4491         // the lane size.
   4492         if (BitWidth > SplatBitSize)
   4493           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
   4494                SplatBitSize < BitWidth;
   4495                SplatBitSize = SplatBitSize * 2)
   4496             SplatValue |= SplatValue.shl(SplatBitSize);
   4497 
   4498         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
   4499         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
   4500         if (SplatBitSize % BitWidth == 0) {
   4501           Constant = APInt::getAllOnesValue(BitWidth);
   4502           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
   4503             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
   4504         }
   4505       }
   4506     }
   4507 
   4508     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
   4509     // actually legal and isn't going to get expanded, else this is a false
   4510     // optimisation.
   4511     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
   4512                                                     Load->getValueType(0),
   4513                                                     Load->getMemoryVT());
   4514 
   4515     // Resize the constant to the same size as the original memory access before
   4516     // extension. If it is still the AllOnesValue then this AND is completely
   4517     // unneeded.
   4518     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
   4519 
   4520     bool B;
   4521     switch (Load->getExtensionType()) {
   4522     default: B = false; break;
   4523     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
   4524     case ISD::ZEXTLOAD:
   4525     case ISD::NON_EXTLOAD: B = true; break;
   4526     }
   4527 
   4528     if (B && Constant.isAllOnesValue()) {
   4529       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
   4530       // preserve semantics once we get rid of the AND.
   4531       SDValue NewLoad(Load, 0);
   4532 
   4533       // Fold the AND away. NewLoad may get replaced immediately.
   4534       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
   4535 
   4536       if (Load->getExtensionType() == ISD::EXTLOAD) {
   4537         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
   4538                               Load->getValueType(0), SDLoc(Load),
   4539                               Load->getChain(), Load->getBasePtr(),
   4540                               Load->getOffset(), Load->getMemoryVT(),
   4541                               Load->getMemOperand());
   4542         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
   4543         if (Load->getNumValues() == 3) {
   4544           // PRE/POST_INC loads have 3 values.
   4545           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
   4546                            NewLoad.getValue(2) };
   4547           CombineTo(Load, To, 3, true);
   4548         } else {
   4549           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
   4550         }
   4551       }
   4552 
   4553       return SDValue(N, 0); // Return N so it doesn't get rechecked!
   4554     }
   4555   }
   4556 
   4557   // fold (and (load x), 255) -> (zextload x, i8)
   4558   // fold (and (extload x, i16), 255) -> (zextload x, i8)
   4559   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
   4560   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
   4561                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
   4562                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
   4563     if (SDValue Res = ReduceLoadWidth(N)) {
   4564       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
   4565         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
   4566 
   4567       AddToWorklist(N);
   4568       CombineTo(LN0, Res, Res.getValue(1));
   4569       return SDValue(N, 0);
   4570     }
   4571   }
   4572 
   4573   if (Level >= AfterLegalizeTypes) {
   4574     // Attempt to propagate the AND back up to the leaves which, if they're
   4575     // loads, can be combined to narrow loads and the AND node can be removed.
   4576     // Perform after legalization so that extend nodes will already be
   4577     // combined into the loads.
   4578     if (BackwardsPropagateMask(N, DAG)) {
   4579       return SDValue(N, 0);
   4580     }
   4581   }
   4582 
   4583   if (SDValue Combined = visitANDLike(N0, N1, N))
   4584     return Combined;
   4585 
   4586   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
   4587   if (N0.getOpcode() == N1.getOpcode())
   4588     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
   4589       return Tmp;
   4590 
   4591   // Masking the negated extension of a boolean is just the zero-extended
   4592   // boolean:
   4593   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
   4594   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
   4595   //
   4596   // Note: the SimplifyDemandedBits fold below can make an information-losing
   4597   // transform, and then we have no way to find this better fold.
   4598   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
   4599     if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) {
   4600       SDValue SubRHS = N0.getOperand(1);
   4601       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
   4602           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
   4603         return SubRHS;
   4604       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
   4605           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
   4606         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
   4607     }
   4608   }
   4609 
   4610   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
   4611   // fold (and (sra)) -> (and (srl)) when possible.
   4612   if (SimplifyDemandedBits(SDValue(N, 0)))
   4613     return SDValue(N, 0);
   4614 
   4615   // fold (zext_inreg (extload x)) -> (zextload x)
   4616   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
   4617     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   4618     EVT MemVT = LN0->getMemoryVT();
   4619     // If we zero all the possible extended bits, then we can turn this into
   4620     // a zextload if we are running before legalize or the operation is legal.
   4621     unsigned BitWidth = N1.getScalarValueSizeInBits();
   4622     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
   4623                            BitWidth - MemVT.getScalarSizeInBits())) &&
   4624         ((!LegalOperations && !LN0->isVolatile()) ||
   4625          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
   4626       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
   4627                                        LN0->getChain(), LN0->getBasePtr(),
   4628                                        MemVT, LN0->getMemOperand());
   4629       AddToWorklist(N);
   4630       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
   4631       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   4632     }
   4633   }
   4634   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
   4635   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
   4636       N0.hasOneUse()) {
   4637     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   4638     EVT MemVT = LN0->getMemoryVT();
   4639     // If we zero all the possible extended bits, then we can turn this into
   4640     // a zextload if we are running before legalize or the operation is legal.
   4641     unsigned BitWidth = N1.getScalarValueSizeInBits();
   4642     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
   4643                            BitWidth - MemVT.getScalarSizeInBits())) &&
   4644         ((!LegalOperations && !LN0->isVolatile()) ||
   4645          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
   4646       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
   4647                                        LN0->getChain(), LN0->getBasePtr(),
   4648                                        MemVT, LN0->getMemOperand());
   4649       AddToWorklist(N);
   4650       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
   4651       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   4652     }
   4653   }
   4654   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
   4655   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
   4656     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
   4657                                            N0.getOperand(1), false))
   4658       return BSwap;
   4659   }
   4660 
   4661   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
   4662     return Shifts;
   4663 
   4664   return SDValue();
   4665 }
   4666 
   4667 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
   4668 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
   4669                                         bool DemandHighBits) {
   4670   if (!LegalOperations)
   4671     return SDValue();
   4672 
   4673   EVT VT = N->getValueType(0);
   4674   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
   4675     return SDValue();
   4676   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
   4677     return SDValue();
   4678 
   4679   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
   4680   bool LookPassAnd0 = false;
   4681   bool LookPassAnd1 = false;
   4682   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
   4683       std::swap(N0, N1);
   4684   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
   4685       std::swap(N0, N1);
   4686   if (N0.getOpcode() == ISD::AND) {
   4687     if (!N0.getNode()->hasOneUse())
   4688       return SDValue();
   4689     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   4690     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
   4691     // This is needed for X86.
   4692     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
   4693                   N01C->getZExtValue() != 0xFFFF))
   4694       return SDValue();
   4695     N0 = N0.getOperand(0);
   4696     LookPassAnd0 = true;
   4697   }
   4698 
   4699   if (N1.getOpcode() == ISD::AND) {
   4700     if (!N1.getNode()->hasOneUse())
   4701       return SDValue();
   4702     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
   4703     if (!N11C || N11C->getZExtValue() != 0xFF)
   4704       return SDValue();
   4705     N1 = N1.getOperand(0);
   4706     LookPassAnd1 = true;
   4707   }
   4708 
   4709   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
   4710     std::swap(N0, N1);
   4711   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
   4712     return SDValue();
   4713   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
   4714     return SDValue();
   4715 
   4716   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   4717   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
   4718   if (!N01C || !N11C)
   4719     return SDValue();
   4720   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
   4721     return SDValue();
   4722 
   4723   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
   4724   SDValue N00 = N0->getOperand(0);
   4725   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
   4726     if (!N00.getNode()->hasOneUse())
   4727       return SDValue();
   4728     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
   4729     if (!N001C || N001C->getZExtValue() != 0xFF)
   4730       return SDValue();
   4731     N00 = N00.getOperand(0);
   4732     LookPassAnd0 = true;
   4733   }
   4734 
   4735   SDValue N10 = N1->getOperand(0);
   4736   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
   4737     if (!N10.getNode()->hasOneUse())
   4738       return SDValue();
   4739     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
   4740     // Also allow 0xFFFF since the bits will be shifted out. This is needed
   4741     // for X86.
   4742     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
   4743                    N101C->getZExtValue() != 0xFFFF))
   4744       return SDValue();
   4745     N10 = N10.getOperand(0);
   4746     LookPassAnd1 = true;
   4747   }
   4748 
   4749   if (N00 != N10)
   4750     return SDValue();
   4751 
   4752   // Make sure everything beyond the low halfword gets set to zero since the SRL
   4753   // 16 will clear the top bits.
   4754   unsigned OpSizeInBits = VT.getSizeInBits();
   4755   if (DemandHighBits && OpSizeInBits > 16) {
   4756     // If the left-shift isn't masked out then the only way this is a bswap is
   4757     // if all bits beyond the low 8 are 0. In that case the entire pattern
   4758     // reduces to a left shift anyway: leave it for other parts of the combiner.
   4759     if (!LookPassAnd0)
   4760       return SDValue();
   4761 
   4762     // However, if the right shift isn't masked out then it might be because
   4763     // it's not needed. See if we can spot that too.
   4764     if (!LookPassAnd1 &&
   4765         !DAG.MaskedValueIsZero(
   4766             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
   4767       return SDValue();
   4768   }
   4769 
   4770   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
   4771   if (OpSizeInBits > 16) {
   4772     SDLoc DL(N);
   4773     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
   4774                       DAG.getConstant(OpSizeInBits - 16, DL,
   4775                                       getShiftAmountTy(VT)));
   4776   }
   4777   return Res;
   4778 }
   4779 
   4780 /// Return true if the specified node is an element that makes up a 32-bit
   4781 /// packed halfword byteswap.
   4782 /// ((x & 0x000000ff) << 8) |
   4783 /// ((x & 0x0000ff00) >> 8) |
   4784 /// ((x & 0x00ff0000) << 8) |
   4785 /// ((x & 0xff000000) >> 8)
   4786 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
   4787   if (!N.getNode()->hasOneUse())
   4788     return false;
   4789 
   4790   unsigned Opc = N.getOpcode();
   4791   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
   4792     return false;
   4793 
   4794   SDValue N0 = N.getOperand(0);
   4795   unsigned Opc0 = N0.getOpcode();
   4796   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
   4797     return false;
   4798 
   4799   ConstantSDNode *N1C = nullptr;
   4800   // SHL or SRL: look upstream for AND mask operand
   4801   if (Opc == ISD::AND)
   4802     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
   4803   else if (Opc0 == ISD::AND)
   4804     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   4805   if (!N1C)
   4806     return false;
   4807 
   4808   unsigned MaskByteOffset;
   4809   switch (N1C->getZExtValue()) {
   4810   default:
   4811     return false;
   4812   case 0xFF:       MaskByteOffset = 0; break;
   4813   case 0xFF00:     MaskByteOffset = 1; break;
   4814   case 0xFFFF:
   4815     // In case demanded bits didn't clear the bits that will be shifted out.
   4816     // This is needed for X86.
   4817     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
   4818       MaskByteOffset = 1;
   4819       break;
   4820     }
   4821     return false;
   4822   case 0xFF0000:   MaskByteOffset = 2; break;
   4823   case 0xFF000000: MaskByteOffset = 3; break;
   4824   }
   4825 
   4826   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
   4827   if (Opc == ISD::AND) {
   4828     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
   4829       // (x >> 8) & 0xff
   4830       // (x >> 8) & 0xff0000
   4831       if (Opc0 != ISD::SRL)
   4832         return false;
   4833       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   4834       if (!C || C->getZExtValue() != 8)
   4835         return false;
   4836     } else {
   4837       // (x << 8) & 0xff00
   4838       // (x << 8) & 0xff000000
   4839       if (Opc0 != ISD::SHL)
   4840         return false;
   4841       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   4842       if (!C || C->getZExtValue() != 8)
   4843         return false;
   4844     }
   4845   } else if (Opc == ISD::SHL) {
   4846     // (x & 0xff) << 8
   4847     // (x & 0xff0000) << 8
   4848     if (MaskByteOffset != 0 && MaskByteOffset != 2)
   4849       return false;
   4850     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
   4851     if (!C || C->getZExtValue() != 8)
   4852       return false;
   4853   } else { // Opc == ISD::SRL
   4854     // (x & 0xff00) >> 8
   4855     // (x & 0xff000000) >> 8
   4856     if (MaskByteOffset != 1 && MaskByteOffset != 3)
   4857       return false;
   4858     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
   4859     if (!C || C->getZExtValue() != 8)
   4860       return false;
   4861   }
   4862 
   4863   if (Parts[MaskByteOffset])
   4864     return false;
   4865 
   4866   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
   4867   return true;
   4868 }
   4869 
   4870 /// Match a 32-bit packed halfword bswap. That is
   4871 /// ((x & 0x000000ff) << 8) |
   4872 /// ((x & 0x0000ff00) >> 8) |
   4873 /// ((x & 0x00ff0000) << 8) |
   4874 /// ((x & 0xff000000) >> 8)
   4875 /// => (rotl (bswap x), 16)
   4876 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
   4877   if (!LegalOperations)
   4878     return SDValue();
   4879 
   4880   EVT VT = N->getValueType(0);
   4881   if (VT != MVT::i32)
   4882     return SDValue();
   4883   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
   4884     return SDValue();
   4885 
   4886   // Look for either
   4887   // (or (or (and), (and)), (or (and), (and)))
   4888   // (or (or (or (and), (and)), (and)), (and))
   4889   if (N0.getOpcode() != ISD::OR)
   4890     return SDValue();
   4891   SDValue N00 = N0.getOperand(0);
   4892   SDValue N01 = N0.getOperand(1);
   4893   SDNode *Parts[4] = {};
   4894 
   4895   if (N1.getOpcode() == ISD::OR &&
   4896       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
   4897     // (or (or (and), (and)), (or (and), (and)))
   4898     if (!isBSwapHWordElement(N00, Parts))
   4899       return SDValue();
   4900 
   4901     if (!isBSwapHWordElement(N01, Parts))
   4902       return SDValue();
   4903     SDValue N10 = N1.getOperand(0);
   4904     if (!isBSwapHWordElement(N10, Parts))
   4905       return SDValue();
   4906     SDValue N11 = N1.getOperand(1);
   4907     if (!isBSwapHWordElement(N11, Parts))
   4908       return SDValue();
   4909   } else {
   4910     // (or (or (or (and), (and)), (and)), (and))
   4911     if (!isBSwapHWordElement(N1, Parts))
   4912       return SDValue();
   4913     if (!isBSwapHWordElement(N01, Parts))
   4914       return SDValue();
   4915     if (N00.getOpcode() != ISD::OR)
   4916       return SDValue();
   4917     SDValue N000 = N00.getOperand(0);
   4918     if (!isBSwapHWordElement(N000, Parts))
   4919       return SDValue();
   4920     SDValue N001 = N00.getOperand(1);
   4921     if (!isBSwapHWordElement(N001, Parts))
   4922       return SDValue();
   4923   }
   4924 
   4925   // Make sure the parts are all coming from the same node.
   4926   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
   4927     return SDValue();
   4928 
   4929   SDLoc DL(N);
   4930   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
   4931                               SDValue(Parts[0], 0));
   4932 
   4933   // Result of the bswap should be rotated by 16. If it's not legal, then
   4934   // do  (x << 16) | (x >> 16).
   4935   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
   4936   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
   4937     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
   4938   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
   4939     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
   4940   return DAG.getNode(ISD::OR, DL, VT,
   4941                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
   4942                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
   4943 }
   4944 
   4945 /// This contains all DAGCombine rules which reduce two values combined by
   4946 /// an Or operation to a single value \see visitANDLike().
   4947 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
   4948   EVT VT = N1.getValueType();
   4949   SDLoc DL(N);
   4950 
   4951   // fold (or x, undef) -> -1
   4952   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
   4953     return DAG.getAllOnesConstant(DL, VT);
   4954 
   4955   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
   4956     return V;
   4957 
   4958   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
   4959   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
   4960       // Don't increase # computations.
   4961       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
   4962     // We can only do this xform if we know that bits from X that are set in C2
   4963     // but not in C1 are already zero.  Likewise for Y.
   4964     if (const ConstantSDNode *N0O1C =
   4965         getAsNonOpaqueConstant(N0.getOperand(1))) {
   4966       if (const ConstantSDNode *N1O1C =
   4967           getAsNonOpaqueConstant(N1.getOperand(1))) {
   4968         // We can only do this xform if we know that bits from X that are set in
   4969         // C2 but not in C1 are already zero.  Likewise for Y.
   4970         const APInt &LHSMask = N0O1C->getAPIntValue();
   4971         const APInt &RHSMask = N1O1C->getAPIntValue();
   4972 
   4973         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
   4974             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
   4975           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
   4976                                   N0.getOperand(0), N1.getOperand(0));
   4977           return DAG.getNode(ISD::AND, DL, VT, X,
   4978                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
   4979         }
   4980       }
   4981     }
   4982   }
   4983 
   4984   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
   4985   if (N0.getOpcode() == ISD::AND &&
   4986       N1.getOpcode() == ISD::AND &&
   4987       N0.getOperand(0) == N1.getOperand(0) &&
   4988       // Don't increase # computations.
   4989       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
   4990     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
   4991                             N0.getOperand(1), N1.getOperand(1));
   4992     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
   4993   }
   4994 
   4995   return SDValue();
   4996 }
   4997 
   4998 SDValue DAGCombiner::visitOR(SDNode *N) {
   4999   SDValue N0 = N->getOperand(0);
   5000   SDValue N1 = N->getOperand(1);
   5001   EVT VT = N1.getValueType();
   5002 
   5003   // x | x --> x
   5004   if (N0 == N1)
   5005     return N0;
   5006 
   5007   // fold vector ops
   5008   if (VT.isVector()) {
   5009     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   5010       return FoldedVOp;
   5011 
   5012     // fold (or x, 0) -> x, vector edition
   5013     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   5014       return N1;
   5015     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   5016       return N0;
   5017 
   5018     // fold (or x, -1) -> -1, vector edition
   5019     if (ISD::isBuildVectorAllOnes(N0.getNode()))
   5020       // do not return N0, because undef node may exist in N0
   5021       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
   5022     if (ISD::isBuildVectorAllOnes(N1.getNode()))
   5023       // do not return N1, because undef node may exist in N1
   5024       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
   5025 
   5026     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
   5027     // Do this only if the resulting shuffle is legal.
   5028     if (isa<ShuffleVectorSDNode>(N0) &&
   5029         isa<ShuffleVectorSDNode>(N1) &&
   5030         // Avoid folding a node with illegal type.
   5031         TLI.isTypeLegal(VT)) {
   5032       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
   5033       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
   5034       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
   5035       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
   5036       // Ensure both shuffles have a zero input.
   5037       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
   5038         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
   5039         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
   5040         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
   5041         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
   5042         bool CanFold = true;
   5043         int NumElts = VT.getVectorNumElements();
   5044         SmallVector<int, 4> Mask(NumElts);
   5045 
   5046         for (int i = 0; i != NumElts; ++i) {
   5047           int M0 = SV0->getMaskElt(i);
   5048           int M1 = SV1->getMaskElt(i);
   5049 
   5050           // Determine if either index is pointing to a zero vector.
   5051           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
   5052           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
   5053 
   5054           // If one element is zero and the otherside is undef, keep undef.
   5055           // This also handles the case that both are undef.
   5056           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
   5057             Mask[i] = -1;
   5058             continue;
   5059           }
   5060 
   5061           // Make sure only one of the elements is zero.
   5062           if (M0Zero == M1Zero) {
   5063             CanFold = false;
   5064             break;
   5065           }
   5066 
   5067           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
   5068 
   5069           // We have a zero and non-zero element. If the non-zero came from
   5070           // SV0 make the index a LHS index. If it came from SV1, make it
   5071           // a RHS index. We need to mod by NumElts because we don't care
   5072           // which operand it came from in the original shuffles.
   5073           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
   5074         }
   5075 
   5076         if (CanFold) {
   5077           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
   5078           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
   5079 
   5080           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
   5081           if (!LegalMask) {
   5082             std::swap(NewLHS, NewRHS);
   5083             ShuffleVectorSDNode::commuteMask(Mask);
   5084             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
   5085           }
   5086 
   5087           if (LegalMask)
   5088             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
   5089         }
   5090       }
   5091     }
   5092   }
   5093 
   5094   // fold (or c1, c2) -> c1|c2
   5095   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   5096   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   5097   if (N0C && N1C && !N1C->isOpaque())
   5098     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
   5099   // canonicalize constant to RHS
   5100   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
   5101      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
   5102     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
   5103   // fold (or x, 0) -> x
   5104   if (isNullConstant(N1))
   5105     return N0;
   5106   // fold (or x, -1) -> -1
   5107   if (isAllOnesConstant(N1))
   5108     return N1;
   5109 
   5110   if (SDValue NewSel = foldBinOpIntoSelect(N))
   5111     return NewSel;
   5112 
   5113   // fold (or x, c) -> c iff (x & ~c) == 0
   5114   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
   5115     return N1;
   5116 
   5117   if (SDValue Combined = visitORLike(N0, N1, N))
   5118     return Combined;
   5119 
   5120   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
   5121   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
   5122     return BSwap;
   5123   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
   5124     return BSwap;
   5125 
   5126   // reassociate or
   5127   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
   5128     return ROR;
   5129 
   5130   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
   5131   // iff (c1 & c2) != 0.
   5132   auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
   5133     return LHS->getAPIntValue().intersects(RHS->getAPIntValue());
   5134   };
   5135   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
   5136       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {
   5137     if (SDValue COR = DAG.FoldConstantArithmetic(
   5138             ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
   5139       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
   5140       AddToWorklist(IOR.getNode());
   5141       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
   5142     }
   5143   }
   5144 
   5145   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
   5146   if (N0.getOpcode() == N1.getOpcode())
   5147     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
   5148       return Tmp;
   5149 
   5150   // See if this is some rotate idiom.
   5151   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
   5152     return SDValue(Rot, 0);
   5153 
   5154   if (SDValue Load = MatchLoadCombine(N))
   5155     return Load;
   5156 
   5157   // Simplify the operands using demanded-bits information.
   5158   if (SimplifyDemandedBits(SDValue(N, 0)))
   5159     return SDValue(N, 0);
   5160 
   5161   return SDValue();
   5162 }
   5163 
   5164 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
   5165   if (Op.getOpcode() == ISD::AND &&
   5166       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
   5167     Mask = Op.getOperand(1);
   5168     return Op.getOperand(0);
   5169   }
   5170   return Op;
   5171 }
   5172 
   5173 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
   5174 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
   5175                             SDValue &Mask) {
   5176   Op = stripConstantMask(DAG, Op, Mask);
   5177   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
   5178     Shift = Op;
   5179     return true;
   5180   }
   5181   return false;
   5182 }
   5183 
   5184 /// Helper function for visitOR to extract the needed side of a rotate idiom
   5185 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
   5186 /// InstCombine merged some outside op with one of the shifts from
   5187 /// the rotate pattern.
   5188 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
   5189 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
   5190 /// patterns:
   5191 ///
   5192 ///   (or (mul v c0) (shrl (mul v c1) c2)):
   5193 ///     expands (mul v c0) -> (shl (mul v c1) c3)
   5194 ///
   5195 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
   5196 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
   5197 ///
   5198 ///   (or (shl v c0) (shrl (shl v c1) c2)):
   5199 ///     expands (shl v c0) -> (shl (shl v c1) c3)
   5200 ///
   5201 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
   5202 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
   5203 ///
   5204 /// Such that in all cases, c3+c2==bitwidth(op v c1).
   5205 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
   5206                                      SDValue ExtractFrom, SDValue &Mask,
   5207                                      const SDLoc &DL) {
   5208   assert(OppShift && ExtractFrom && "Empty SDValue");
   5209   assert(
   5210       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
   5211       "Existing shift must be valid as a rotate half");
   5212 
   5213   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
   5214   // Preconditions:
   5215   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
   5216   //
   5217   // Find opcode of the needed shift to be extracted from (op0 v c0).
   5218   unsigned Opcode = ISD::DELETED_NODE;
   5219   bool IsMulOrDiv = false;
   5220   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
   5221   // opcode or its arithmetic (mul or udiv) variant.
   5222   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
   5223     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
   5224     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
   5225       return false;
   5226     Opcode = NeededShift;
   5227     return true;
   5228   };
   5229   // op0 must be either the needed shift opcode or the mul/udiv equivalent
   5230   // that the needed shift can be extracted from.
   5231   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
   5232       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
   5233     return SDValue();
   5234 
   5235   // op0 must be the same opcode on both sides, have the same LHS argument,
   5236   // and produce the same value type.
   5237   SDValue OppShiftLHS = OppShift.getOperand(0);
   5238   EVT ShiftedVT = OppShiftLHS.getValueType();
   5239   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
   5240       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
   5241       ShiftedVT != ExtractFrom.getValueType())
   5242     return SDValue();
   5243 
   5244   // Amount of the existing shift.
   5245   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
   5246   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
   5247   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
   5248   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
   5249   ConstantSDNode *ExtractFromCst =
   5250       isConstOrConstSplat(ExtractFrom.getOperand(1));
   5251   // TODO: We should be able to handle non-uniform constant vectors for these values
   5252   // Check that we have constant values.
   5253   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
   5254       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
   5255       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
   5256     return SDValue();
   5257 
   5258   // Compute the shift amount we need to extract to complete the rotate.
   5259   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
   5260   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
   5261   if (NeededShiftAmt.isNegative())
   5262     return SDValue();
   5263   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
   5264   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
   5265   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
   5266   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
   5267 
   5268   // Now try extract the needed shift from the ExtractFrom op and see if the
   5269   // result matches up with the existing shift's LHS op.
   5270   if (IsMulOrDiv) {
   5271     // Op to extract from is a mul or udiv by a constant.
   5272     // Check:
   5273     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
   5274     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
   5275     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
   5276                                                  NeededShiftAmt.getZExtValue());
   5277     APInt ResultAmt;
   5278     APInt Rem;
   5279     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
   5280     if (Rem != 0 || ResultAmt != OppLHSAmt)
   5281       return SDValue();
   5282   } else {
   5283     // Op to extract from is a shift by a constant.
   5284     // Check:
   5285     //      c2 - (bitwidth(op0 v c0) - c1) == c0
   5286     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
   5287                                           ExtractFromAmt.getBitWidth()))
   5288       return SDValue();
   5289   }
   5290 
   5291   // Return the expanded shift op that should allow a rotate to be formed.
   5292   EVT ShiftVT = OppShift.getOperand(1).getValueType();
   5293   EVT ResVT = ExtractFrom.getValueType();
   5294   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
   5295   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
   5296 }
   5297 
   5298 // Return true if we can prove that, whenever Neg and Pos are both in the
   5299 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
   5300 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
   5301 //
   5302 //     (or (shift1 X, Neg), (shift2 X, Pos))
   5303 //
   5304 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
   5305 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
   5306 // to consider shift amounts with defined behavior.
   5307 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
   5308                            SelectionDAG &DAG) {
   5309   // If EltSize is a power of 2 then:
   5310   //
   5311   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
   5312   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
   5313   //
   5314   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
   5315   // for the stronger condition:
   5316   //
   5317   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
   5318   //
   5319   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
   5320   // we can just replace Neg with Neg' for the rest of the function.
   5321   //
   5322   // In other cases we check for the even stronger condition:
   5323   //
   5324   //     Neg == EltSize - Pos                                    [B]
   5325   //
   5326   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
   5327   // behavior if Pos == 0 (and consequently Neg == EltSize).
   5328   //
   5329   // We could actually use [A] whenever EltSize is a power of 2, but the
   5330   // only extra cases that it would match are those uninteresting ones
   5331   // where Neg and Pos are never in range at the same time.  E.g. for
   5332   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
   5333   // as well as (sub 32, Pos), but:
   5334   //
   5335   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
   5336   //
   5337   // always invokes undefined behavior for 32-bit X.
   5338   //
   5339   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
   5340   unsigned MaskLoBits = 0;
   5341   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
   5342     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
   5343       KnownBits Known;
   5344       DAG.computeKnownBits(Neg.getOperand(0), Known);
   5345       unsigned Bits = Log2_64(EltSize);
   5346       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
   5347           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
   5348         Neg = Neg.getOperand(0);
   5349         MaskLoBits = Bits;
   5350       }
   5351     }
   5352   }
   5353 
   5354   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
   5355   if (Neg.getOpcode() != ISD::SUB)
   5356     return false;
   5357   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
   5358   if (!NegC)
   5359     return false;
   5360   SDValue NegOp1 = Neg.getOperand(1);
   5361 
   5362   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
   5363   // Pos'.  The truncation is redundant for the purpose of the equality.
   5364   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
   5365     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
   5366       KnownBits Known;
   5367       DAG.computeKnownBits(Pos.getOperand(0), Known);
   5368       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
   5369           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
   5370            MaskLoBits))
   5371         Pos = Pos.getOperand(0);
   5372     }
   5373   }
   5374 
   5375   // The condition we need is now:
   5376   //
   5377   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
   5378   //
   5379   // If NegOp1 == Pos then we need:
   5380   //
   5381   //              EltSize & Mask == NegC & Mask
   5382   //
   5383   // (because "x & Mask" is a truncation and distributes through subtraction).
   5384   APInt Width;
   5385   if (Pos == NegOp1)
   5386     Width = NegC->getAPIntValue();
   5387 
   5388   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
   5389   // Then the condition we want to prove becomes:
   5390   //
   5391   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
   5392   //
   5393   // which, again because "x & Mask" is a truncation, becomes:
   5394   //
   5395   //                NegC & Mask == (EltSize - PosC) & Mask
   5396   //             EltSize & Mask == (NegC + PosC) & Mask
   5397   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
   5398     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
   5399       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
   5400     else
   5401       return false;
   5402   } else
   5403     return false;
   5404 
   5405   // Now we just need to check that EltSize & Mask == Width & Mask.
   5406   if (MaskLoBits)
   5407     // EltSize & Mask is 0 since Mask is EltSize - 1.
   5408     return Width.getLoBits(MaskLoBits) == 0;
   5409   return Width == EltSize;
   5410 }
   5411 
   5412 // A subroutine of MatchRotate used once we have found an OR of two opposite
   5413 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
   5414 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
   5415 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
   5416 // Neg with outer conversions stripped away.
   5417 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
   5418                                        SDValue Neg, SDValue InnerPos,
   5419                                        SDValue InnerNeg, unsigned PosOpcode,
   5420                                        unsigned NegOpcode, const SDLoc &DL) {
   5421   // fold (or (shl x, (*ext y)),
   5422   //          (srl x, (*ext (sub 32, y)))) ->
   5423   //   (rotl x, y) or (rotr x, (sub 32, y))
   5424   //
   5425   // fold (or (shl x, (*ext (sub 32, y))),
   5426   //          (srl x, (*ext y))) ->
   5427   //   (rotr x, y) or (rotl x, (sub 32, y))
   5428   EVT VT = Shifted.getValueType();
   5429   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
   5430     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
   5431     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
   5432                        HasPos ? Pos : Neg).getNode();
   5433   }
   5434 
   5435   return nullptr;
   5436 }
   5437 
   5438 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
   5439 // idioms for rotate, and if the target supports rotation instructions, generate
   5440 // a rot[lr].
   5441 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
   5442   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
   5443   EVT VT = LHS.getValueType();
   5444   if (!TLI.isTypeLegal(VT)) return nullptr;
   5445 
   5446   // The target must have at least one rotate flavor.
   5447   bool HasROTL = hasOperation(ISD::ROTL, VT);
   5448   bool HasROTR = hasOperation(ISD::ROTR, VT);
   5449   if (!HasROTL && !HasROTR) return nullptr;
   5450 
   5451   // Check for truncated rotate.
   5452   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
   5453       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
   5454     assert(LHS.getValueType() == RHS.getValueType());
   5455     if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
   5456       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
   5457                          SDValue(Rot, 0)).getNode();
   5458     }
   5459   }
   5460 
   5461   // Match "(X shl/srl V1) & V2" where V2 may not be present.
   5462   SDValue LHSShift;   // The shift.
   5463   SDValue LHSMask;    // AND value if any.
   5464   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
   5465 
   5466   SDValue RHSShift;   // The shift.
   5467   SDValue RHSMask;    // AND value if any.
   5468   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
   5469 
   5470   // If neither side matched a rotate half, bail
   5471   if (!LHSShift && !RHSShift)
   5472     return nullptr;
   5473 
   5474   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
   5475   // side of the rotate, so try to handle that here. In all cases we need to
   5476   // pass the matched shift from the opposite side to compute the opcode and
   5477   // needed shift amount to extract.  We still want to do this if both sides
   5478   // matched a rotate half because one half may be a potential overshift that
   5479   // can be broken down (ie if InstCombine merged two shl or srl ops into a
   5480   // single one).
   5481 
   5482   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
   5483   if (LHSShift)
   5484     if (SDValue NewRHSShift =
   5485             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
   5486       RHSShift = NewRHSShift;
   5487   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
   5488   if (RHSShift)
   5489     if (SDValue NewLHSShift =
   5490             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
   5491       LHSShift = NewLHSShift;
   5492 
   5493   // If a side is still missing, nothing else we can do.
   5494   if (!RHSShift || !LHSShift)
   5495     return nullptr;
   5496 
   5497   // At this point we've matched or extracted a shift op on each side.
   5498 
   5499   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
   5500     return nullptr;   // Not shifting the same value.
   5501 
   5502   if (LHSShift.getOpcode() == RHSShift.getOpcode())
   5503     return nullptr;   // Shifts must disagree.
   5504 
   5505   // Canonicalize shl to left side in a shl/srl pair.
   5506   if (RHSShift.getOpcode() == ISD::SHL) {
   5507     std::swap(LHS, RHS);
   5508     std::swap(LHSShift, RHSShift);
   5509     std::swap(LHSMask, RHSMask);
   5510   }
   5511 
   5512   unsigned EltSizeInBits = VT.getScalarSizeInBits();
   5513   SDValue LHSShiftArg = LHSShift.getOperand(0);
   5514   SDValue LHSShiftAmt = LHSShift.getOperand(1);
   5515   SDValue RHSShiftArg = RHSShift.getOperand(0);
   5516   SDValue RHSShiftAmt = RHSShift.getOperand(1);
   5517 
   5518   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
   5519   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
   5520   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
   5521                                         ConstantSDNode *RHS) {
   5522     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
   5523   };
   5524   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
   5525     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
   5526                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
   5527 
   5528     // If there is an AND of either shifted operand, apply it to the result.
   5529     if (LHSMask.getNode() || RHSMask.getNode()) {
   5530       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
   5531       SDValue Mask = AllOnes;
   5532 
   5533       if (LHSMask.getNode()) {
   5534         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
   5535         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
   5536                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
   5537       }
   5538       if (RHSMask.getNode()) {
   5539         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
   5540         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
   5541                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
   5542       }
   5543 
   5544       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
   5545     }
   5546 
   5547     return Rot.getNode();
   5548   }
   5549 
   5550   // If there is a mask here, and we have a variable shift, we can't be sure
   5551   // that we're masking out the right stuff.
   5552   if (LHSMask.getNode() || RHSMask.getNode())
   5553     return nullptr;
   5554 
   5555   // If the shift amount is sign/zext/any-extended just peel it off.
   5556   SDValue LExtOp0 = LHSShiftAmt;
   5557   SDValue RExtOp0 = RHSShiftAmt;
   5558   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
   5559        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
   5560        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
   5561        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
   5562       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
   5563        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
   5564        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
   5565        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
   5566     LExtOp0 = LHSShiftAmt.getOperand(0);
   5567     RExtOp0 = RHSShiftAmt.getOperand(0);
   5568   }
   5569 
   5570   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
   5571                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
   5572   if (TryL)
   5573     return TryL;
   5574 
   5575   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
   5576                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
   5577   if (TryR)
   5578     return TryR;
   5579 
   5580   return nullptr;
   5581 }
   5582 
   5583 namespace {
   5584 
   5585 /// Represents known origin of an individual byte in load combine pattern. The
   5586 /// value of the byte is either constant zero or comes from memory.
   5587 struct ByteProvider {
   5588   // For constant zero providers Load is set to nullptr. For memory providers
   5589   // Load represents the node which loads the byte from memory.
   5590   // ByteOffset is the offset of the byte in the value produced by the load.
   5591   LoadSDNode *Load = nullptr;
   5592   unsigned ByteOffset = 0;
   5593 
   5594   ByteProvider() = default;
   5595 
   5596   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
   5597     return ByteProvider(Load, ByteOffset);
   5598   }
   5599 
   5600   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
   5601 
   5602   bool isConstantZero() const { return !Load; }
   5603   bool isMemory() const { return Load; }
   5604 
   5605   bool operator==(const ByteProvider &Other) const {
   5606     return Other.Load == Load && Other.ByteOffset == ByteOffset;
   5607   }
   5608 
   5609 private:
   5610   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
   5611       : Load(Load), ByteOffset(ByteOffset) {}
   5612 };
   5613 
   5614 } // end anonymous namespace
   5615 
   5616 /// Recursively traverses the expression calculating the origin of the requested
   5617 /// byte of the given value. Returns None if the provider can't be calculated.
   5618 ///
   5619 /// For all the values except the root of the expression verifies that the value
   5620 /// has exactly one use and if it's not true return None. This way if the origin
   5621 /// of the byte is returned it's guaranteed that the values which contribute to
   5622 /// the byte are not used outside of this expression.
   5623 ///
   5624 /// Because the parts of the expression are not allowed to have more than one
   5625 /// use this function iterates over trees, not DAGs. So it never visits the same
   5626 /// node more than once.
   5627 static const Optional<ByteProvider>
   5628 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
   5629                       bool Root = false) {
   5630   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
   5631   if (Depth == 10)
   5632     return None;
   5633 
   5634   if (!Root && !Op.hasOneUse())
   5635     return None;
   5636 
   5637   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
   5638   unsigned BitWidth = Op.getValueSizeInBits();
   5639   if (BitWidth % 8 != 0)
   5640     return None;
   5641   unsigned ByteWidth = BitWidth / 8;
   5642   assert(Index < ByteWidth && "invalid index requested");
   5643   (void) ByteWidth;
   5644 
   5645   switch (Op.getOpcode()) {
   5646   case ISD::OR: {
   5647     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
   5648     if (!LHS)
   5649       return None;
   5650     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
   5651     if (!RHS)
   5652       return None;
   5653 
   5654     if (LHS->isConstantZero())
   5655       return RHS;
   5656     if (RHS->isConstantZero())
   5657       return LHS;
   5658     return None;
   5659   }
   5660   case ISD::SHL: {
   5661     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
   5662     if (!ShiftOp)
   5663       return None;
   5664 
   5665     uint64_t BitShift = ShiftOp->getZExtValue();
   5666     if (BitShift % 8 != 0)
   5667       return None;
   5668     uint64_t ByteShift = BitShift / 8;
   5669 
   5670     return Index < ByteShift
   5671                ? ByteProvider::getConstantZero()
   5672                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
   5673                                        Depth + 1);
   5674   }
   5675   case ISD::ANY_EXTEND:
   5676   case ISD::SIGN_EXTEND:
   5677   case ISD::ZERO_EXTEND: {
   5678     SDValue NarrowOp = Op->getOperand(0);
   5679     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
   5680     if (NarrowBitWidth % 8 != 0)
   5681       return None;
   5682     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
   5683 
   5684     if (Index >= NarrowByteWidth)
   5685       return Op.getOpcode() == ISD::ZERO_EXTEND
   5686                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
   5687                  : None;
   5688     return calculateByteProvider(NarrowOp, Index, Depth + 1);
   5689   }
   5690   case ISD::BSWAP:
   5691     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
   5692                                  Depth + 1);
   5693   case ISD::LOAD: {
   5694     auto L = cast<LoadSDNode>(Op.getNode());
   5695     if (L->isVolatile() || L->isIndexed())
   5696       return None;
   5697 
   5698     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
   5699     if (NarrowBitWidth % 8 != 0)
   5700       return None;
   5701     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
   5702 
   5703     if (Index >= NarrowByteWidth)
   5704       return L->getExtensionType() == ISD::ZEXTLOAD
   5705                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
   5706                  : None;
   5707     return ByteProvider::getMemory(L, Index);
   5708   }
   5709   }
   5710 
   5711   return None;
   5712 }
   5713 
   5714 /// Match a pattern where a wide type scalar value is loaded by several narrow
   5715 /// loads and combined by shifts and ors. Fold it into a single load or a load
   5716 /// and a BSWAP if the targets supports it.
   5717 ///
   5718 /// Assuming little endian target:
   5719 ///  i8 *a = ...
   5720 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
   5721 /// =>
   5722 ///  i32 val = *((i32)a)
   5723 ///
   5724 ///  i8 *a = ...
   5725 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
   5726 /// =>
   5727 ///  i32 val = BSWAP(*((i32)a))
   5728 ///
   5729 /// TODO: This rule matches complex patterns with OR node roots and doesn't
   5730 /// interact well with the worklist mechanism. When a part of the pattern is
   5731 /// updated (e.g. one of the loads) its direct users are put into the worklist,
   5732 /// but the root node of the pattern which triggers the load combine is not
   5733 /// necessarily a direct user of the changed node. For example, once the address
   5734 /// of t28 load is reassociated load combine won't be triggered:
   5735 ///             t25: i32 = add t4, Constant:i32<2>
   5736 ///           t26: i64 = sign_extend t25
   5737 ///        t27: i64 = add t2, t26
   5738 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
   5739 ///     t29: i32 = zero_extend t28
   5740 ///   t32: i32 = shl t29, Constant:i8<8>
   5741 /// t33: i32 = or t23, t32
   5742 /// As a possible fix visitLoad can check if the load can be a part of a load
   5743 /// combine pattern and add corresponding OR roots to the worklist.
   5744 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
   5745   assert(N->getOpcode() == ISD::OR &&
   5746          "Can only match load combining against OR nodes");
   5747 
   5748   // Handles simple types only
   5749   EVT VT = N->getValueType(0);
   5750   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
   5751     return SDValue();
   5752   unsigned ByteWidth = VT.getSizeInBits() / 8;
   5753 
   5754   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   5755   // Before legalize we can introduce too wide illegal loads which will be later
   5756   // split into legal sized loads. This enables us to combine i64 load by i8
   5757   // patterns to a couple of i32 loads on 32 bit targets.
   5758   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
   5759     return SDValue();
   5760 
   5761   std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
   5762     unsigned BW, unsigned i) { return i; };
   5763   std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
   5764     unsigned BW, unsigned i) { return BW - i - 1; };
   5765 
   5766   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
   5767   auto MemoryByteOffset = [&] (ByteProvider P) {
   5768     assert(P.isMemory() && "Must be a memory byte provider");
   5769     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
   5770     assert(LoadBitWidth % 8 == 0 &&
   5771            "can only analyze providers for individual bytes not bit");
   5772     unsigned LoadByteWidth = LoadBitWidth / 8;
   5773     return IsBigEndianTarget
   5774             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
   5775             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
   5776   };
   5777 
   5778   Optional<BaseIndexOffset> Base;
   5779   SDValue Chain;
   5780 
   5781   SmallPtrSet<LoadSDNode *, 8> Loads;
   5782   Optional<ByteProvider> FirstByteProvider;
   5783   int64_t FirstOffset = INT64_MAX;
   5784 
   5785   // Check if all the bytes of the OR we are looking at are loaded from the same
   5786   // base address. Collect bytes offsets from Base address in ByteOffsets.
   5787   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
   5788   for (unsigned i = 0; i < ByteWidth; i++) {
   5789     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
   5790     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
   5791       return SDValue();
   5792 
   5793     LoadSDNode *L = P->Load;
   5794     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
   5795            "Must be enforced by calculateByteProvider");
   5796     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
   5797 
   5798     // All loads must share the same chain
   5799     SDValue LChain = L->getChain();
   5800     if (!Chain)
   5801       Chain = LChain;
   5802     else if (Chain != LChain)
   5803       return SDValue();
   5804 
   5805     // Loads must share the same base address
   5806     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
   5807     int64_t ByteOffsetFromBase = 0;
   5808     if (!Base)
   5809       Base = Ptr;
   5810     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
   5811       return SDValue();
   5812 
   5813     // Calculate the offset of the current byte from the base address
   5814     ByteOffsetFromBase += MemoryByteOffset(*P);
   5815     ByteOffsets[i] = ByteOffsetFromBase;
   5816 
   5817     // Remember the first byte load
   5818     if (ByteOffsetFromBase < FirstOffset) {
   5819       FirstByteProvider = P;
   5820       FirstOffset = ByteOffsetFromBase;
   5821     }
   5822 
   5823     Loads.insert(L);
   5824   }
   5825   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
   5826          "memory, so there must be at least one load which produces the value");
   5827   assert(Base && "Base address of the accessed memory location must be set");
   5828   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
   5829 
   5830   // Check if the bytes of the OR we are looking at match with either big or
   5831   // little endian value load
   5832   bool BigEndian = true, LittleEndian = true;
   5833   for (unsigned i = 0; i < ByteWidth; i++) {
   5834     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
   5835     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
   5836     BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
   5837     if (!BigEndian && !LittleEndian)
   5838       return SDValue();
   5839   }
   5840   assert((BigEndian != LittleEndian) && "should be either or");
   5841   assert(FirstByteProvider && "must be set");
   5842 
   5843   // Ensure that the first byte is loaded from zero offset of the first load.
   5844   // So the combined value can be loaded from the first load address.
   5845   if (MemoryByteOffset(*FirstByteProvider) != 0)
   5846     return SDValue();
   5847   LoadSDNode *FirstLoad = FirstByteProvider->Load;
   5848 
   5849   // The node we are looking at matches with the pattern, check if we can
   5850   // replace it with a single load and bswap if needed.
   5851 
   5852   // If the load needs byte swap check if the target supports it
   5853   bool NeedsBswap = IsBigEndianTarget != BigEndian;
   5854 
   5855   // Before legalize we can introduce illegal bswaps which will be later
   5856   // converted to an explicit bswap sequence. This way we end up with a single
   5857   // load and byte shuffling instead of several loads and byte shuffling.
   5858   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
   5859     return SDValue();
   5860 
   5861   // Check that a load of the wide type is both allowed and fast on the target
   5862   bool Fast = false;
   5863   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
   5864                                         VT, FirstLoad->getAddressSpace(),
   5865                                         FirstLoad->getAlignment(), &Fast);
   5866   if (!Allowed || !Fast)
   5867     return SDValue();
   5868 
   5869   SDValue NewLoad =
   5870       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
   5871                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
   5872 
   5873   // Transfer chain users from old loads to the new load.
   5874   for (LoadSDNode *L : Loads)
   5875     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
   5876 
   5877   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
   5878 }
   5879 
   5880 // If the target has andn, bsl, or a similar bit-select instruction,
   5881 // we want to unfold masked merge, with canonical pattern of:
   5882 //   |        A  |  |B|
   5883 //   ((x ^ y) & m) ^ y
   5884 //    |  D  |
   5885 // Into:
   5886 //   (x & m) | (y & ~m)
   5887 // If y is a constant, and the 'andn' does not work with immediates,
   5888 // we unfold into a different pattern:
   5889 //   ~(~x & m) & (m | y)
   5890 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
   5891 //       the very least that breaks andnpd / andnps patterns, and because those
   5892 //       patterns are simplified in IR and shouldn't be created in the DAG
   5893 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
   5894   assert(N->getOpcode() == ISD::XOR);
   5895 
   5896   // Don't touch 'not' (i.e. where y = -1).
   5897   if (isAllOnesConstantOrAllOnesSplatConstant(N->getOperand(1)))
   5898     return SDValue();
   5899 
   5900   EVT VT = N->getValueType(0);
   5901 
   5902   // There are 3 commutable operators in the pattern,
   5903   // so we have to deal with 8 possible variants of the basic pattern.
   5904   SDValue X, Y, M;
   5905   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
   5906     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
   5907       return false;
   5908     SDValue Xor = And.getOperand(XorIdx);
   5909     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
   5910       return false;
   5911     SDValue Xor0 = Xor.getOperand(0);
   5912     SDValue Xor1 = Xor.getOperand(1);
   5913     // Don't touch 'not' (i.e. where y = -1).
   5914     if (isAllOnesConstantOrAllOnesSplatConstant(Xor1))
   5915       return false;
   5916     if (Other == Xor0)
   5917       std::swap(Xor0, Xor1);
   5918     if (Other != Xor1)
   5919       return false;
   5920     X = Xor0;
   5921     Y = Xor1;
   5922     M = And.getOperand(XorIdx ? 0 : 1);
   5923     return true;
   5924   };
   5925 
   5926   SDValue N0 = N->getOperand(0);
   5927   SDValue N1 = N->getOperand(1);
   5928   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
   5929       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
   5930     return SDValue();
   5931 
   5932   // Don't do anything if the mask is constant. This should not be reachable.
   5933   // InstCombine should have already unfolded this pattern, and DAGCombiner
   5934   // probably shouldn't produce it, too.
   5935   if (isa<ConstantSDNode>(M.getNode()))
   5936     return SDValue();
   5937 
   5938   // We can transform if the target has AndNot
   5939   if (!TLI.hasAndNot(M))
   5940     return SDValue();
   5941 
   5942   SDLoc DL(N);
   5943 
   5944   // If Y is a constant, check that 'andn' works with immediates.
   5945   if (!TLI.hasAndNot(Y)) {
   5946     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
   5947     // If not, we need to do a bit more work to make sure andn is still used.
   5948     SDValue NotX = DAG.getNOT(DL, X, VT);
   5949     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
   5950     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
   5951     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
   5952     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
   5953   }
   5954 
   5955   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
   5956   SDValue NotM = DAG.getNOT(DL, M, VT);
   5957   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
   5958 
   5959   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
   5960 }
   5961 
   5962 SDValue DAGCombiner::visitXOR(SDNode *N) {
   5963   SDValue N0 = N->getOperand(0);
   5964   SDValue N1 = N->getOperand(1);
   5965   EVT VT = N0.getValueType();
   5966 
   5967   // fold vector ops
   5968   if (VT.isVector()) {
   5969     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   5970       return FoldedVOp;
   5971 
   5972     // fold (xor x, 0) -> x, vector edition
   5973     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   5974       return N1;
   5975     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   5976       return N0;
   5977   }
   5978 
   5979   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
   5980   if (N0.isUndef() && N1.isUndef())
   5981     return DAG.getConstant(0, SDLoc(N), VT);
   5982   // fold (xor x, undef) -> undef
   5983   if (N0.isUndef())
   5984     return N0;
   5985   if (N1.isUndef())
   5986     return N1;
   5987   // fold (xor c1, c2) -> c1^c2
   5988   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   5989   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   5990   if (N0C && N1C)
   5991     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
   5992   // canonicalize constant to RHS
   5993   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
   5994      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
   5995     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
   5996   // fold (xor x, 0) -> x
   5997   if (isNullConstant(N1))
   5998     return N0;
   5999 
   6000   if (SDValue NewSel = foldBinOpIntoSelect(N))
   6001     return NewSel;
   6002 
   6003   // reassociate xor
   6004   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
   6005     return RXOR;
   6006 
   6007   // fold !(x cc y) -> (x !cc y)
   6008   SDValue LHS, RHS, CC;
   6009   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
   6010     bool isInt = LHS.getValueType().isInteger();
   6011     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
   6012                                                isInt);
   6013 
   6014     if (!LegalOperations ||
   6015         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
   6016       switch (N0.getOpcode()) {
   6017       default:
   6018         llvm_unreachable("Unhandled SetCC Equivalent!");
   6019       case ISD::SETCC:
   6020         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
   6021       case ISD::SELECT_CC:
   6022         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
   6023                                N0.getOperand(3), NotCC);
   6024       }
   6025     }
   6026   }
   6027 
   6028   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
   6029   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
   6030       N0.getNode()->hasOneUse() &&
   6031       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
   6032     SDValue V = N0.getOperand(0);
   6033     SDLoc DL(N0);
   6034     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
   6035                     DAG.getConstant(1, DL, V.getValueType()));
   6036     AddToWorklist(V.getNode());
   6037     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
   6038   }
   6039 
   6040   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
   6041   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
   6042       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
   6043     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
   6044     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
   6045       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
   6046       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
   6047       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
   6048       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
   6049       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
   6050     }
   6051   }
   6052   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
   6053   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
   6054       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
   6055     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
   6056     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
   6057       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
   6058       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
   6059       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
   6060       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
   6061       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
   6062     }
   6063   }
   6064   // fold (xor (and x, y), y) -> (and (not x), y)
   6065   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
   6066       N0->getOperand(1) == N1) {
   6067     SDValue X = N0->getOperand(0);
   6068     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
   6069     AddToWorklist(NotX.getNode());
   6070     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
   6071   }
   6072 
   6073   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
   6074   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
   6075     SDValue A = N0.getOpcode() == ISD::ADD ? N0 : N1;
   6076     SDValue S = N0.getOpcode() == ISD::SRA ? N0 : N1;
   6077     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
   6078       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
   6079       SDValue S0 = S.getOperand(0);
   6080       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
   6081         unsigned OpSizeInBits = VT.getScalarSizeInBits();
   6082         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
   6083           if (C->getAPIntValue() == (OpSizeInBits - 1))
   6084             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
   6085       }
   6086     }
   6087   }
   6088 
   6089   // fold (xor x, x) -> 0
   6090   if (N0 == N1)
   6091     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
   6092 
   6093   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
   6094   // Here is a concrete example of this equivalence:
   6095   // i16   x ==  14
   6096   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
   6097   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
   6098   //
   6099   // =>
   6100   //
   6101   // i16     ~1      == 0b1111111111111110
   6102   // i16 rol(~1, 14) == 0b1011111111111111
   6103   //
   6104   // Some additional tips to help conceptualize this transform:
   6105   // - Try to see the operation as placing a single zero in a value of all ones.
   6106   // - There exists no value for x which would allow the result to contain zero.
   6107   // - Values of x larger than the bitwidth are undefined and do not require a
   6108   //   consistent result.
   6109   // - Pushing the zero left requires shifting one bits in from the right.
   6110   // A rotate left of ~1 is a nice way of achieving the desired result.
   6111   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
   6112       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
   6113     SDLoc DL(N);
   6114     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
   6115                        N0.getOperand(1));
   6116   }
   6117 
   6118   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
   6119   if (N0.getOpcode() == N1.getOpcode())
   6120     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
   6121       return Tmp;
   6122 
   6123   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
   6124   if (SDValue MM = unfoldMaskedMerge(N))
   6125     return MM;
   6126 
   6127   // Simplify the expression using non-local knowledge.
   6128   if (SimplifyDemandedBits(SDValue(N, 0)))
   6129     return SDValue(N, 0);
   6130 
   6131   return SDValue();
   6132 }
   6133 
   6134 /// Handle transforms common to the three shifts, when the shift amount is a
   6135 /// constant.
   6136 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
   6137   SDNode *LHS = N->getOperand(0).getNode();
   6138   if (!LHS->hasOneUse()) return SDValue();
   6139 
   6140   // We want to pull some binops through shifts, so that we have (and (shift))
   6141   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
   6142   // thing happens with address calculations, so it's important to canonicalize
   6143   // it.
   6144   bool HighBitSet = false;  // Can we transform this if the high bit is set?
   6145 
   6146   switch (LHS->getOpcode()) {
   6147   default: return SDValue();
   6148   case ISD::OR:
   6149   case ISD::XOR:
   6150     HighBitSet = false; // We can only transform sra if the high bit is clear.
   6151     break;
   6152   case ISD::AND:
   6153     HighBitSet = true;  // We can only transform sra if the high bit is set.
   6154     break;
   6155   case ISD::ADD:
   6156     if (N->getOpcode() != ISD::SHL)
   6157       return SDValue(); // only shl(add) not sr[al](add).
   6158     HighBitSet = false; // We can only transform sra if the high bit is clear.
   6159     break;
   6160   }
   6161 
   6162   // We require the RHS of the binop to be a constant and not opaque as well.
   6163   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
   6164   if (!BinOpCst) return SDValue();
   6165 
   6166   // FIXME: disable this unless the input to the binop is a shift by a constant
   6167   // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
   6168   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
   6169   bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
   6170                  BinOpLHSVal->getOpcode() == ISD::SRA ||
   6171                  BinOpLHSVal->getOpcode() == ISD::SRL;
   6172   bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
   6173                         BinOpLHSVal->getOpcode() == ISD::SELECT;
   6174 
   6175   if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
   6176       !isCopyOrSelect)
   6177     return SDValue();
   6178 
   6179   if (isCopyOrSelect && N->hasOneUse())
   6180     return SDValue();
   6181 
   6182   EVT VT = N->getValueType(0);
   6183 
   6184   // If this is a signed shift right, and the high bit is modified by the
   6185   // logical operation, do not perform the transformation. The highBitSet
   6186   // boolean indicates the value of the high bit of the constant which would
   6187   // cause it to be modified for this operation.
   6188   if (N->getOpcode() == ISD::SRA) {
   6189     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
   6190     if (BinOpRHSSignSet != HighBitSet)
   6191       return SDValue();
   6192   }
   6193 
   6194   if (!TLI.isDesirableToCommuteWithShift(LHS))
   6195     return SDValue();
   6196 
   6197   // Fold the constants, shifting the binop RHS by the shift amount.
   6198   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
   6199                                N->getValueType(0),
   6200                                LHS->getOperand(1), N->getOperand(1));
   6201   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
   6202 
   6203   // Create the new shift.
   6204   SDValue NewShift = DAG.getNode(N->getOpcode(),
   6205                                  SDLoc(LHS->getOperand(0)),
   6206                                  VT, LHS->getOperand(0), N->getOperand(1));
   6207 
   6208   // Create the new binop.
   6209   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
   6210 }
   6211 
   6212 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
   6213   assert(N->getOpcode() == ISD::TRUNCATE);
   6214   assert(N->getOperand(0).getOpcode() == ISD::AND);
   6215 
   6216   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
   6217   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
   6218     SDValue N01 = N->getOperand(0).getOperand(1);
   6219     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
   6220       SDLoc DL(N);
   6221       EVT TruncVT = N->getValueType(0);
   6222       SDValue N00 = N->getOperand(0).getOperand(0);
   6223       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
   6224       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
   6225       AddToWorklist(Trunc00.getNode());
   6226       AddToWorklist(Trunc01.getNode());
   6227       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
   6228     }
   6229   }
   6230 
   6231   return SDValue();
   6232 }
   6233 
   6234 SDValue DAGCombiner::visitRotate(SDNode *N) {
   6235   SDLoc dl(N);
   6236   SDValue N0 = N->getOperand(0);
   6237   SDValue N1 = N->getOperand(1);
   6238   EVT VT = N->getValueType(0);
   6239   unsigned Bitsize = VT.getScalarSizeInBits();
   6240 
   6241   // fold (rot x, 0) -> x
   6242   if (isNullConstantOrNullSplatConstant(N1))
   6243     return N0;
   6244 
   6245   // fold (rot x, c) -> (rot x, c % BitSize)
   6246   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
   6247     if (Cst->getAPIntValue().uge(Bitsize)) {
   6248       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
   6249       return DAG.getNode(N->getOpcode(), dl, VT, N0,
   6250                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
   6251     }
   6252   }
   6253 
   6254   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
   6255   if (N1.getOpcode() == ISD::TRUNCATE &&
   6256       N1.getOperand(0).getOpcode() == ISD::AND) {
   6257     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
   6258       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
   6259   }
   6260 
   6261   unsigned NextOp = N0.getOpcode();
   6262   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
   6263   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
   6264     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
   6265     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
   6266     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
   6267       EVT ShiftVT = C1->getValueType(0);
   6268       bool SameSide = (N->getOpcode() == NextOp);
   6269       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
   6270       if (SDValue CombinedShift =
   6271               DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
   6272         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
   6273         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
   6274             ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
   6275             BitsizeC.getNode());
   6276         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
   6277                            CombinedShiftNorm);
   6278       }
   6279     }
   6280   }
   6281   return SDValue();
   6282 }
   6283 
   6284 SDValue DAGCombiner::visitSHL(SDNode *N) {
   6285   SDValue N0 = N->getOperand(0);
   6286   SDValue N1 = N->getOperand(1);
   6287   EVT VT = N0.getValueType();
   6288   unsigned OpSizeInBits = VT.getScalarSizeInBits();
   6289 
   6290   // fold vector ops
   6291   if (VT.isVector()) {
   6292     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   6293       return FoldedVOp;
   6294 
   6295     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
   6296     // If setcc produces all-one true value then:
   6297     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
   6298     if (N1CV && N1CV->isConstant()) {
   6299       if (N0.getOpcode() == ISD::AND) {
   6300         SDValue N00 = N0->getOperand(0);
   6301         SDValue N01 = N0->getOperand(1);
   6302         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
   6303 
   6304         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
   6305             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
   6306                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
   6307           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
   6308                                                      N01CV, N1CV))
   6309             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
   6310         }
   6311       }
   6312     }
   6313   }
   6314 
   6315   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   6316 
   6317   // fold (shl c1, c2) -> c1<<c2
   6318   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   6319   if (N0C && N1C && !N1C->isOpaque())
   6320     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
   6321   // fold (shl 0, x) -> 0
   6322   if (isNullConstantOrNullSplatConstant(N0))
   6323     return N0;
   6324   // fold (shl x, c >= size(x)) -> undef
   6325   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
   6326   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
   6327     return Val->getAPIntValue().uge(OpSizeInBits);
   6328   };
   6329   if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
   6330     return DAG.getUNDEF(VT);
   6331   // fold (shl x, 0) -> x
   6332   if (N1C && N1C->isNullValue())
   6333     return N0;
   6334   // fold (shl undef, x) -> 0
   6335   if (N0.isUndef())
   6336     return DAG.getConstant(0, SDLoc(N), VT);
   6337 
   6338   if (SDValue NewSel = foldBinOpIntoSelect(N))
   6339     return NewSel;
   6340 
   6341   // if (shl x, c) is known to be zero, return 0
   6342   if (DAG.MaskedValueIsZero(SDValue(N, 0),
   6343                             APInt::getAllOnesValue(OpSizeInBits)))
   6344     return DAG.getConstant(0, SDLoc(N), VT);
   6345   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
   6346   if (N1.getOpcode() == ISD::TRUNCATE &&
   6347       N1.getOperand(0).getOpcode() == ISD::AND) {
   6348     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
   6349       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
   6350   }
   6351 
   6352   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
   6353     return SDValue(N, 0);
   6354 
   6355   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
   6356   if (N0.getOpcode() == ISD::SHL) {
   6357     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
   6358                                           ConstantSDNode *RHS) {
   6359       APInt c1 = LHS->getAPIntValue();
   6360       APInt c2 = RHS->getAPIntValue();
   6361       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
   6362       return (c1 + c2).uge(OpSizeInBits);
   6363     };
   6364     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
   6365       return DAG.getConstant(0, SDLoc(N), VT);
   6366 
   6367     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
   6368                                        ConstantSDNode *RHS) {
   6369       APInt c1 = LHS->getAPIntValue();
   6370       APInt c2 = RHS->getAPIntValue();
   6371       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
   6372       return (c1 + c2).ult(OpSizeInBits);
   6373     };
   6374     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
   6375       SDLoc DL(N);
   6376       EVT ShiftVT = N1.getValueType();
   6377       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
   6378       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
   6379     }
   6380   }
   6381 
   6382   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
   6383   // For this to be valid, the second form must not preserve any of the bits
   6384   // that are shifted out by the inner shift in the first form.  This means
   6385   // the outer shift size must be >= the number of bits added by the ext.
   6386   // As a corollary, we don't care what kind of ext it is.
   6387   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
   6388               N0.getOpcode() == ISD::ANY_EXTEND ||
   6389               N0.getOpcode() == ISD::SIGN_EXTEND) &&
   6390       N0.getOperand(0).getOpcode() == ISD::SHL) {
   6391     SDValue N0Op0 = N0.getOperand(0);
   6392     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
   6393       APInt c1 = N0Op0C1->getAPIntValue();
   6394       APInt c2 = N1C->getAPIntValue();
   6395       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
   6396 
   6397       EVT InnerShiftVT = N0Op0.getValueType();
   6398       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
   6399       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
   6400         SDLoc DL(N0);
   6401         APInt Sum = c1 + c2;
   6402         if (Sum.uge(OpSizeInBits))
   6403           return DAG.getConstant(0, DL, VT);
   6404 
   6405         return DAG.getNode(
   6406             ISD::SHL, DL, VT,
   6407             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
   6408             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
   6409       }
   6410     }
   6411   }
   6412 
   6413   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
   6414   // Only fold this if the inner zext has no other uses to avoid increasing
   6415   // the total number of instructions.
   6416   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
   6417       N0.getOperand(0).getOpcode() == ISD::SRL) {
   6418     SDValue N0Op0 = N0.getOperand(0);
   6419     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
   6420       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
   6421         uint64_t c1 = N0Op0C1->getZExtValue();
   6422         uint64_t c2 = N1C->getZExtValue();
   6423         if (c1 == c2) {
   6424           SDValue NewOp0 = N0.getOperand(0);
   6425           EVT CountVT = NewOp0.getOperand(1).getValueType();
   6426           SDLoc DL(N);
   6427           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
   6428                                        NewOp0,
   6429                                        DAG.getConstant(c2, DL, CountVT));
   6430           AddToWorklist(NewSHL.getNode());
   6431           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
   6432         }
   6433       }
   6434     }
   6435   }
   6436 
   6437   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
   6438   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
   6439   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
   6440       N0->getFlags().hasExact()) {
   6441     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
   6442       uint64_t C1 = N0C1->getZExtValue();
   6443       uint64_t C2 = N1C->getZExtValue();
   6444       SDLoc DL(N);
   6445       if (C1 <= C2)
   6446         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
   6447                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
   6448       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
   6449                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
   6450     }
   6451   }
   6452 
   6453   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
   6454   //                               (and (srl x, (sub c1, c2), MASK)
   6455   // Only fold this if the inner shift has no other uses -- if it does, folding
   6456   // this will increase the total number of instructions.
   6457   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
   6458     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
   6459       uint64_t c1 = N0C1->getZExtValue();
   6460       if (c1 < OpSizeInBits) {
   6461         uint64_t c2 = N1C->getZExtValue();
   6462         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
   6463         SDValue Shift;
   6464         if (c2 > c1) {
   6465           Mask <<= c2 - c1;
   6466           SDLoc DL(N);
   6467           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
   6468                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
   6469         } else {
   6470           Mask.lshrInPlace(c1 - c2);
   6471           SDLoc DL(N);
   6472           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
   6473                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
   6474         }
   6475         SDLoc DL(N0);
   6476         return DAG.getNode(ISD::AND, DL, VT, Shift,
   6477                            DAG.getConstant(Mask, DL, VT));
   6478       }
   6479     }
   6480   }
   6481 
   6482   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
   6483   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
   6484       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
   6485     SDLoc DL(N);
   6486     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
   6487     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
   6488     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
   6489   }
   6490 
   6491   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
   6492   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
   6493   // Variant of version done on multiply, except mul by a power of 2 is turned
   6494   // into a shift.
   6495   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
   6496       N0.getNode()->hasOneUse() &&
   6497       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
   6498       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
   6499     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
   6500     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
   6501     AddToWorklist(Shl0.getNode());
   6502     AddToWorklist(Shl1.getNode());
   6503     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
   6504   }
   6505 
   6506   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
   6507   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
   6508       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
   6509       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
   6510     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
   6511     if (isConstantOrConstantVector(Shl))
   6512       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
   6513   }
   6514 
   6515   if (N1C && !N1C->isOpaque())
   6516     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
   6517       return NewSHL;
   6518 
   6519   return SDValue();
   6520 }
   6521 
   6522 SDValue DAGCombiner::visitSRA(SDNode *N) {
   6523   SDValue N0 = N->getOperand(0);
   6524   SDValue N1 = N->getOperand(1);
   6525   EVT VT = N0.getValueType();
   6526   unsigned OpSizeInBits = VT.getScalarSizeInBits();
   6527 
   6528   // Arithmetic shifting an all-sign-bit value is a no-op.
   6529   // fold (sra 0, x) -> 0
   6530   // fold (sra -1, x) -> -1
   6531   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
   6532     return N0;
   6533 
   6534   // fold vector ops
   6535   if (VT.isVector())
   6536     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   6537       return FoldedVOp;
   6538 
   6539   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   6540 
   6541   // fold (sra c1, c2) -> (sra c1, c2)
   6542   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   6543   if (N0C && N1C && !N1C->isOpaque())
   6544     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
   6545   // fold (sra x, c >= size(x)) -> undef
   6546   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
   6547   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
   6548     return Val->getAPIntValue().uge(OpSizeInBits);
   6549   };
   6550   if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
   6551     return DAG.getUNDEF(VT);
   6552   // fold (sra x, 0) -> x
   6553   if (N1C && N1C->isNullValue())
   6554     return N0;
   6555 
   6556   if (SDValue NewSel = foldBinOpIntoSelect(N))
   6557     return NewSel;
   6558 
   6559   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
   6560   // sext_inreg.
   6561   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
   6562     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
   6563     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
   6564     if (VT.isVector())
   6565       ExtVT = EVT::getVectorVT(*DAG.getContext(),
   6566                                ExtVT, VT.getVectorNumElements());
   6567     if ((!LegalOperations ||
   6568          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
   6569       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
   6570                          N0.getOperand(0), DAG.getValueType(ExtVT));
   6571   }
   6572 
   6573   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
   6574   if (N0.getOpcode() == ISD::SRA) {
   6575     SDLoc DL(N);
   6576     EVT ShiftVT = N1.getValueType();
   6577 
   6578     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
   6579                                           ConstantSDNode *RHS) {
   6580       APInt c1 = LHS->getAPIntValue();
   6581       APInt c2 = RHS->getAPIntValue();
   6582       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
   6583       return (c1 + c2).uge(OpSizeInBits);
   6584     };
   6585     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
   6586       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
   6587                          DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT));
   6588 
   6589     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
   6590                                        ConstantSDNode *RHS) {
   6591       APInt c1 = LHS->getAPIntValue();
   6592       APInt c2 = RHS->getAPIntValue();
   6593       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
   6594       return (c1 + c2).ult(OpSizeInBits);
   6595     };
   6596     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
   6597       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
   6598       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum);
   6599     }
   6600   }
   6601 
   6602   // fold (sra (shl X, m), (sub result_size, n))
   6603   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
   6604   // result_size - n != m.
   6605   // If truncate is free for the target sext(shl) is likely to result in better
   6606   // code.
   6607   if (N0.getOpcode() == ISD::SHL && N1C) {
   6608     // Get the two constanst of the shifts, CN0 = m, CN = n.
   6609     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
   6610     if (N01C) {
   6611       LLVMContext &Ctx = *DAG.getContext();
   6612       // Determine what the truncate's result bitsize and type would be.
   6613       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
   6614 
   6615       if (VT.isVector())
   6616         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
   6617 
   6618       // Determine the residual right-shift amount.
   6619       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
   6620 
   6621       // If the shift is not a no-op (in which case this should be just a sign
   6622       // extend already), the truncated to type is legal, sign_extend is legal
   6623       // on that type, and the truncate to that type is both legal and free,
   6624       // perform the transform.
   6625       if ((ShiftAmt > 0) &&
   6626           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
   6627           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
   6628           TLI.isTruncateFree(VT, TruncVT)) {
   6629         SDLoc DL(N);
   6630         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
   6631             getShiftAmountTy(N0.getOperand(0).getValueType()));
   6632         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
   6633                                     N0.getOperand(0), Amt);
   6634         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
   6635                                     Shift);
   6636         return DAG.getNode(ISD::SIGN_EXTEND, DL,
   6637                            N->getValueType(0), Trunc);
   6638       }
   6639     }
   6640   }
   6641 
   6642   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
   6643   if (N1.getOpcode() == ISD::TRUNCATE &&
   6644       N1.getOperand(0).getOpcode() == ISD::AND) {
   6645     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
   6646       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
   6647   }
   6648 
   6649   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
   6650   //      if c1 is equal to the number of bits the trunc removes
   6651   if (N0.getOpcode() == ISD::TRUNCATE &&
   6652       (N0.getOperand(0).getOpcode() == ISD::SRL ||
   6653        N0.getOperand(0).getOpcode() == ISD::SRA) &&
   6654       N0.getOperand(0).hasOneUse() &&
   6655       N0.getOperand(0).getOperand(1).hasOneUse() &&
   6656       N1C) {
   6657     SDValue N0Op0 = N0.getOperand(0);
   6658     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
   6659       unsigned LargeShiftVal = LargeShift->getZExtValue();
   6660       EVT LargeVT = N0Op0.getValueType();
   6661 
   6662       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
   6663         SDLoc DL(N);
   6664         SDValue Amt =
   6665           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
   6666                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
   6667         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
   6668                                   N0Op0.getOperand(0), Amt);
   6669         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
   6670       }
   6671     }
   6672   }
   6673 
   6674   // Simplify, based on bits shifted out of the LHS.
   6675   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
   6676     return SDValue(N, 0);
   6677 
   6678   // If the sign bit is known to be zero, switch this to a SRL.
   6679   if (DAG.SignBitIsZero(N0))
   6680     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
   6681 
   6682   if (N1C && !N1C->isOpaque())
   6683     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
   6684       return NewSRA;
   6685 
   6686   return SDValue();
   6687 }
   6688 
   6689 SDValue DAGCombiner::visitSRL(SDNode *N) {
   6690   SDValue N0 = N->getOperand(0);
   6691   SDValue N1 = N->getOperand(1);
   6692   EVT VT = N0.getValueType();
   6693   unsigned OpSizeInBits = VT.getScalarSizeInBits();
   6694 
   6695   // fold vector ops
   6696   if (VT.isVector())
   6697     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   6698       return FoldedVOp;
   6699 
   6700   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   6701 
   6702   // fold (srl c1, c2) -> c1 >>u c2
   6703   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   6704   if (N0C && N1C && !N1C->isOpaque())
   6705     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
   6706   // fold (srl 0, x) -> 0
   6707   if (isNullConstantOrNullSplatConstant(N0))
   6708     return N0;
   6709   // fold (srl x, c >= size(x)) -> undef
   6710   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
   6711   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
   6712     return Val->getAPIntValue().uge(OpSizeInBits);
   6713   };
   6714   if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
   6715     return DAG.getUNDEF(VT);
   6716   // fold (srl x, 0) -> x
   6717   if (N1C && N1C->isNullValue())
   6718     return N0;
   6719 
   6720   if (SDValue NewSel = foldBinOpIntoSelect(N))
   6721     return NewSel;
   6722 
   6723   // if (srl x, c) is known to be zero, return 0
   6724   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
   6725                                    APInt::getAllOnesValue(OpSizeInBits)))
   6726     return DAG.getConstant(0, SDLoc(N), VT);
   6727 
   6728   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
   6729   if (N0.getOpcode() == ISD::SRL) {
   6730     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
   6731                                           ConstantSDNode *RHS) {
   6732       APInt c1 = LHS->getAPIntValue();
   6733       APInt c2 = RHS->getAPIntValue();
   6734       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
   6735       return (c1 + c2).uge(OpSizeInBits);
   6736     };
   6737     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
   6738       return DAG.getConstant(0, SDLoc(N), VT);
   6739 
   6740     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
   6741                                        ConstantSDNode *RHS) {
   6742       APInt c1 = LHS->getAPIntValue();
   6743       APInt c2 = RHS->getAPIntValue();
   6744       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
   6745       return (c1 + c2).ult(OpSizeInBits);
   6746     };
   6747     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
   6748       SDLoc DL(N);
   6749       EVT ShiftVT = N1.getValueType();
   6750       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
   6751       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
   6752     }
   6753   }
   6754 
   6755   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
   6756   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
   6757       N0.getOperand(0).getOpcode() == ISD::SRL) {
   6758     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
   6759       uint64_t c1 = N001C->getZExtValue();
   6760       uint64_t c2 = N1C->getZExtValue();
   6761       EVT InnerShiftVT = N0.getOperand(0).getValueType();
   6762       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
   6763       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
   6764       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
   6765       if (c1 + OpSizeInBits == InnerShiftSize) {
   6766         SDLoc DL(N0);
   6767         if (c1 + c2 >= InnerShiftSize)
   6768           return DAG.getConstant(0, DL, VT);
   6769         return DAG.getNode(ISD::TRUNCATE, DL, VT,
   6770                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
   6771                                        N0.getOperand(0).getOperand(0),
   6772                                        DAG.getConstant(c1 + c2, DL,
   6773                                                        ShiftCountVT)));
   6774       }
   6775     }
   6776   }
   6777 
   6778   // fold (srl (shl x, c), c) -> (and x, cst2)
   6779   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
   6780       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
   6781     SDLoc DL(N);
   6782     SDValue Mask =
   6783         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
   6784     AddToWorklist(Mask.getNode());
   6785     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
   6786   }
   6787 
   6788   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
   6789   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
   6790     // Shifting in all undef bits?
   6791     EVT SmallVT = N0.getOperand(0).getValueType();
   6792     unsigned BitSize = SmallVT.getScalarSizeInBits();
   6793     if (N1C->getZExtValue() >= BitSize)
   6794       return DAG.getUNDEF(VT);
   6795 
   6796     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
   6797       uint64_t ShiftAmt = N1C->getZExtValue();
   6798       SDLoc DL0(N0);
   6799       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
   6800                                        N0.getOperand(0),
   6801                           DAG.getConstant(ShiftAmt, DL0,
   6802                                           getShiftAmountTy(SmallVT)));
   6803       AddToWorklist(SmallShift.getNode());
   6804       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
   6805       SDLoc DL(N);
   6806       return DAG.getNode(ISD::AND, DL, VT,
   6807                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
   6808                          DAG.getConstant(Mask, DL, VT));
   6809     }
   6810   }
   6811 
   6812   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
   6813   // bit, which is unmodified by sra.
   6814   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
   6815     if (N0.getOpcode() == ISD::SRA)
   6816       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
   6817   }
   6818 
   6819   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
   6820   if (N1C && N0.getOpcode() == ISD::CTLZ &&
   6821       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
   6822     KnownBits Known;
   6823     DAG.computeKnownBits(N0.getOperand(0), Known);
   6824 
   6825     // If any of the input bits are KnownOne, then the input couldn't be all
   6826     // zeros, thus the result of the srl will always be zero.
   6827     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
   6828 
   6829     // If all of the bits input the to ctlz node are known to be zero, then
   6830     // the result of the ctlz is "32" and the result of the shift is one.
   6831     APInt UnknownBits = ~Known.Zero;
   6832     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
   6833 
   6834     // Otherwise, check to see if there is exactly one bit input to the ctlz.
   6835     if (UnknownBits.isPowerOf2()) {
   6836       // Okay, we know that only that the single bit specified by UnknownBits
   6837       // could be set on input to the CTLZ node. If this bit is set, the SRL
   6838       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
   6839       // to an SRL/XOR pair, which is likely to simplify more.
   6840       unsigned ShAmt = UnknownBits.countTrailingZeros();
   6841       SDValue Op = N0.getOperand(0);
   6842 
   6843       if (ShAmt) {
   6844         SDLoc DL(N0);
   6845         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
   6846                   DAG.getConstant(ShAmt, DL,
   6847                                   getShiftAmountTy(Op.getValueType())));
   6848         AddToWorklist(Op.getNode());
   6849       }
   6850 
   6851       SDLoc DL(N);
   6852       return DAG.getNode(ISD::XOR, DL, VT,
   6853                          Op, DAG.getConstant(1, DL, VT));
   6854     }
   6855   }
   6856 
   6857   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
   6858   if (N1.getOpcode() == ISD::TRUNCATE &&
   6859       N1.getOperand(0).getOpcode() == ISD::AND) {
   6860     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
   6861       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
   6862   }
   6863 
   6864   // fold operands of srl based on knowledge that the low bits are not
   6865   // demanded.
   6866   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
   6867     return SDValue(N, 0);
   6868 
   6869   if (N1C && !N1C->isOpaque())
   6870     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
   6871       return NewSRL;
   6872 
   6873   // Attempt to convert a srl of a load into a narrower zero-extending load.
   6874   if (SDValue NarrowLoad = ReduceLoadWidth(N))
   6875     return NarrowLoad;
   6876 
   6877   // Here is a common situation. We want to optimize:
   6878   //
   6879   //   %a = ...
   6880   //   %b = and i32 %a, 2
   6881   //   %c = srl i32 %b, 1
   6882   //   brcond i32 %c ...
   6883   //
   6884   // into
   6885   //
   6886   //   %a = ...
   6887   //   %b = and %a, 2
   6888   //   %c = setcc eq %b, 0
   6889   //   brcond %c ...
   6890   //
   6891   // However when after the source operand of SRL is optimized into AND, the SRL
   6892   // itself may not be optimized further. Look for it and add the BRCOND into
   6893   // the worklist.
   6894   if (N->hasOneUse()) {
   6895     SDNode *Use = *N->use_begin();
   6896     if (Use->getOpcode() == ISD::BRCOND)
   6897       AddToWorklist(Use);
   6898     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
   6899       // Also look pass the truncate.
   6900       Use = *Use->use_begin();
   6901       if (Use->getOpcode() == ISD::BRCOND)
   6902         AddToWorklist(Use);
   6903     }
   6904   }
   6905 
   6906   return SDValue();
   6907 }
   6908 
   6909 SDValue DAGCombiner::visitABS(SDNode *N) {
   6910   SDValue N0 = N->getOperand(0);
   6911   EVT VT = N->getValueType(0);
   6912 
   6913   // fold (abs c1) -> c2
   6914   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
   6915     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
   6916   // fold (abs (abs x)) -> (abs x)
   6917   if (N0.getOpcode() == ISD::ABS)
   6918     return N0;
   6919   // fold (abs x) -> x iff not-negative
   6920   if (DAG.SignBitIsZero(N0))
   6921     return N0;
   6922   return SDValue();
   6923 }
   6924 
   6925 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
   6926   SDValue N0 = N->getOperand(0);
   6927   EVT VT = N->getValueType(0);
   6928 
   6929   // fold (bswap c1) -> c2
   6930   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
   6931     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
   6932   // fold (bswap (bswap x)) -> x
   6933   if (N0.getOpcode() == ISD::BSWAP)
   6934     return N0->getOperand(0);
   6935   return SDValue();
   6936 }
   6937 
   6938 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
   6939   SDValue N0 = N->getOperand(0);
   6940   EVT VT = N->getValueType(0);
   6941 
   6942   // fold (bitreverse c1) -> c2
   6943   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
   6944     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
   6945   // fold (bitreverse (bitreverse x)) -> x
   6946   if (N0.getOpcode() == ISD::BITREVERSE)
   6947     return N0.getOperand(0);
   6948   return SDValue();
   6949 }
   6950 
   6951 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
   6952   SDValue N0 = N->getOperand(0);
   6953   EVT VT = N->getValueType(0);
   6954 
   6955   // fold (ctlz c1) -> c2
   6956   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
   6957     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
   6958 
   6959   // If the value is known never to be zero, switch to the undef version.
   6960   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
   6961     if (DAG.isKnownNeverZero(N0))
   6962       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
   6963   }
   6964 
   6965   return SDValue();
   6966 }
   6967 
   6968 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
   6969   SDValue N0 = N->getOperand(0);
   6970   EVT VT = N->getValueType(0);
   6971 
   6972   // fold (ctlz_zero_undef c1) -> c2
   6973   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
   6974     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
   6975   return SDValue();
   6976 }
   6977 
   6978 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
   6979   SDValue N0 = N->getOperand(0);
   6980   EVT VT = N->getValueType(0);
   6981 
   6982   // fold (cttz c1) -> c2
   6983   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
   6984     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
   6985 
   6986   // If the value is known never to be zero, switch to the undef version.
   6987   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
   6988     if (DAG.isKnownNeverZero(N0))
   6989       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
   6990   }
   6991 
   6992   return SDValue();
   6993 }
   6994 
   6995 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
   6996   SDValue N0 = N->getOperand(0);
   6997   EVT VT = N->getValueType(0);
   6998 
   6999   // fold (cttz_zero_undef c1) -> c2
   7000   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
   7001     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
   7002   return SDValue();
   7003 }
   7004 
   7005 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
   7006   SDValue N0 = N->getOperand(0);
   7007   EVT VT = N->getValueType(0);
   7008 
   7009   // fold (ctpop c1) -> c2
   7010   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
   7011     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
   7012   return SDValue();
   7013 }
   7014 
   7015 /// Generate Min/Max node
   7016 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
   7017                                    SDValue RHS, SDValue True, SDValue False,
   7018                                    ISD::CondCode CC, const TargetLowering &TLI,
   7019                                    SelectionDAG &DAG) {
   7020   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
   7021     return SDValue();
   7022 
   7023   switch (CC) {
   7024   case ISD::SETOLT:
   7025   case ISD::SETOLE:
   7026   case ISD::SETLT:
   7027   case ISD::SETLE:
   7028   case ISD::SETULT:
   7029   case ISD::SETULE: {
   7030     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
   7031     if (TLI.isOperationLegal(Opcode, VT))
   7032       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
   7033     return SDValue();
   7034   }
   7035   case ISD::SETOGT:
   7036   case ISD::SETOGE:
   7037   case ISD::SETGT:
   7038   case ISD::SETGE:
   7039   case ISD::SETUGT:
   7040   case ISD::SETUGE: {
   7041     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
   7042     if (TLI.isOperationLegal(Opcode, VT))
   7043       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
   7044     return SDValue();
   7045   }
   7046   default:
   7047     return SDValue();
   7048   }
   7049 }
   7050 
   7051 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
   7052   SDValue Cond = N->getOperand(0);
   7053   SDValue N1 = N->getOperand(1);
   7054   SDValue N2 = N->getOperand(2);
   7055   EVT VT = N->getValueType(0);
   7056   EVT CondVT = Cond.getValueType();
   7057   SDLoc DL(N);
   7058 
   7059   if (!VT.isInteger())
   7060     return SDValue();
   7061 
   7062   auto *C1 = dyn_cast<ConstantSDNode>(N1);
   7063   auto *C2 = dyn_cast<ConstantSDNode>(N2);
   7064   if (!C1 || !C2)
   7065     return SDValue();
   7066 
   7067   // Only do this before legalization to avoid conflicting with target-specific
   7068   // transforms in the other direction (create a select from a zext/sext). There
   7069   // is also a target-independent combine here in DAGCombiner in the other
   7070   // direction for (select Cond, -1, 0) when the condition is not i1.
   7071   if (CondVT == MVT::i1 && !LegalOperations) {
   7072     if (C1->isNullValue() && C2->isOne()) {
   7073       // select Cond, 0, 1 --> zext (!Cond)
   7074       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
   7075       if (VT != MVT::i1)
   7076         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
   7077       return NotCond;
   7078     }
   7079     if (C1->isNullValue() && C2->isAllOnesValue()) {
   7080       // select Cond, 0, -1 --> sext (!Cond)
   7081       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
   7082       if (VT != MVT::i1)
   7083         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
   7084       return NotCond;
   7085     }
   7086     if (C1->isOne() && C2->isNullValue()) {
   7087       // select Cond, 1, 0 --> zext (Cond)
   7088       if (VT != MVT::i1)
   7089         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
   7090       return Cond;
   7091     }
   7092     if (C1->isAllOnesValue() && C2->isNullValue()) {
   7093       // select Cond, -1, 0 --> sext (Cond)
   7094       if (VT != MVT::i1)
   7095         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
   7096       return Cond;
   7097     }
   7098 
   7099     // For any constants that differ by 1, we can transform the select into an
   7100     // extend and add. Use a target hook because some targets may prefer to
   7101     // transform in the other direction.
   7102     if (TLI.convertSelectOfConstantsToMath(VT)) {
   7103       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
   7104         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
   7105         if (VT != MVT::i1)
   7106           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
   7107         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
   7108       }
   7109       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
   7110         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
   7111         if (VT != MVT::i1)
   7112           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
   7113         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
   7114       }
   7115     }
   7116 
   7117     return SDValue();
   7118   }
   7119 
   7120   // fold (select Cond, 0, 1) -> (xor Cond, 1)
   7121   // We can't do this reliably if integer based booleans have different contents
   7122   // to floating point based booleans. This is because we can't tell whether we
   7123   // have an integer-based boolean or a floating-point-based boolean unless we
   7124   // can find the SETCC that produced it and inspect its operands. This is
   7125   // fairly easy if C is the SETCC node, but it can potentially be
   7126   // undiscoverable (or not reasonably discoverable). For example, it could be
   7127   // in another basic block or it could require searching a complicated
   7128   // expression.
   7129   if (CondVT.isInteger() &&
   7130       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
   7131           TargetLowering::ZeroOrOneBooleanContent &&
   7132       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
   7133           TargetLowering::ZeroOrOneBooleanContent &&
   7134       C1->isNullValue() && C2->isOne()) {
   7135     SDValue NotCond =
   7136         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
   7137     if (VT.bitsEq(CondVT))
   7138       return NotCond;
   7139     return DAG.getZExtOrTrunc(NotCond, DL, VT);
   7140   }
   7141 
   7142   return SDValue();
   7143 }
   7144 
   7145 SDValue DAGCombiner::visitSELECT(SDNode *N) {
   7146   SDValue N0 = N->getOperand(0);
   7147   SDValue N1 = N->getOperand(1);
   7148   SDValue N2 = N->getOperand(2);
   7149   EVT VT = N->getValueType(0);
   7150   EVT VT0 = N0.getValueType();
   7151   SDLoc DL(N);
   7152 
   7153   // fold (select C, X, X) -> X
   7154   if (N1 == N2)
   7155     return N1;
   7156 
   7157   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
   7158     // fold (select true, X, Y) -> X
   7159     // fold (select false, X, Y) -> Y
   7160     return !N0C->isNullValue() ? N1 : N2;
   7161   }
   7162 
   7163   // fold (select X, X, Y) -> (or X, Y)
   7164   // fold (select X, 1, Y) -> (or C, Y)
   7165   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
   7166     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
   7167 
   7168   if (SDValue V = foldSelectOfConstants(N))
   7169     return V;
   7170 
   7171   // fold (select C, 0, X) -> (and (not C), X)
   7172   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
   7173     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
   7174     AddToWorklist(NOTNode.getNode());
   7175     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
   7176   }
   7177   // fold (select C, X, 1) -> (or (not C), X)
   7178   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
   7179     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
   7180     AddToWorklist(NOTNode.getNode());
   7181     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
   7182   }
   7183   // fold (select X, Y, X) -> (and X, Y)
   7184   // fold (select X, Y, 0) -> (and X, Y)
   7185   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
   7186     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
   7187 
   7188   // If we can fold this based on the true/false value, do so.
   7189   if (SimplifySelectOps(N, N1, N2))
   7190     return SDValue(N, 0); // Don't revisit N.
   7191 
   7192   if (VT0 == MVT::i1) {
   7193     // The code in this block deals with the following 2 equivalences:
   7194     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
   7195     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
   7196     // The target can specify its preferred form with the
   7197     // shouldNormalizeToSelectSequence() callback. However we always transform
   7198     // to the right anyway if we find the inner select exists in the DAG anyway
   7199     // and we always transform to the left side if we know that we can further
   7200     // optimize the combination of the conditions.
   7201     bool normalizeToSequence =
   7202         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
   7203     // select (and Cond0, Cond1), X, Y
   7204     //   -> select Cond0, (select Cond1, X, Y), Y
   7205     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
   7206       SDValue Cond0 = N0->getOperand(0);
   7207       SDValue Cond1 = N0->getOperand(1);
   7208       SDValue InnerSelect =
   7209           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
   7210       if (normalizeToSequence || !InnerSelect.use_empty())
   7211         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
   7212                            InnerSelect, N2);
   7213     }
   7214     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
   7215     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
   7216       SDValue Cond0 = N0->getOperand(0);
   7217       SDValue Cond1 = N0->getOperand(1);
   7218       SDValue InnerSelect =
   7219           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
   7220       if (normalizeToSequence || !InnerSelect.use_empty())
   7221         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
   7222                            InnerSelect);
   7223     }
   7224 
   7225     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
   7226     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
   7227       SDValue N1_0 = N1->getOperand(0);
   7228       SDValue N1_1 = N1->getOperand(1);
   7229       SDValue N1_2 = N1->getOperand(2);
   7230       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
   7231         // Create the actual and node if we can generate good code for it.
   7232         if (!normalizeToSequence) {
   7233           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
   7234           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
   7235         }
   7236         // Otherwise see if we can optimize the "and" to a better pattern.
   7237         if (SDValue Combined = visitANDLike(N0, N1_0, N))
   7238           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
   7239                              N2);
   7240       }
   7241     }
   7242     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
   7243     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
   7244       SDValue N2_0 = N2->getOperand(0);
   7245       SDValue N2_1 = N2->getOperand(1);
   7246       SDValue N2_2 = N2->getOperand(2);
   7247       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
   7248         // Create the actual or node if we can generate good code for it.
   7249         if (!normalizeToSequence) {
   7250           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
   7251           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
   7252         }
   7253         // Otherwise see if we can optimize to a better pattern.
   7254         if (SDValue Combined = visitORLike(N0, N2_0, N))
   7255           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
   7256                              N2_2);
   7257       }
   7258     }
   7259   }
   7260 
   7261   if (VT0 == MVT::i1) {
   7262     // select (not Cond), N1, N2 -> select Cond, N2, N1
   7263     if (isBitwiseNot(N0))
   7264       return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1);
   7265   }
   7266 
   7267   // fold selects based on a setcc into other things, such as min/max/abs
   7268   if (N0.getOpcode() == ISD::SETCC) {
   7269     // select x, y (fcmp lt x, y) -> fminnum x, y
   7270     // select x, y (fcmp gt x, y) -> fmaxnum x, y
   7271     //
   7272     // This is OK if we don't care about what happens if either operand is a
   7273     // NaN.
   7274     //
   7275 
   7276     // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
   7277     // no signed zeros as well as no nans.
   7278     const TargetOptions &Options = DAG.getTarget().Options;
   7279     if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() &&
   7280         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
   7281       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
   7282 
   7283       if (SDValue FMinMax = combineMinNumMaxNum(
   7284               DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
   7285         return FMinMax;
   7286     }
   7287 
   7288     if ((!LegalOperations &&
   7289          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
   7290         TLI.isOperationLegal(ISD::SELECT_CC, VT))
   7291       return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0),
   7292                          N0.getOperand(1), N1, N2, N0.getOperand(2));
   7293     return SimplifySelect(DL, N0, N1, N2);
   7294   }
   7295 
   7296   return SDValue();
   7297 }
   7298 
   7299 static
   7300 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
   7301   SDLoc DL(N);
   7302   EVT LoVT, HiVT;
   7303   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
   7304 
   7305   // Split the inputs.
   7306   SDValue Lo, Hi, LL, LH, RL, RH;
   7307   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
   7308   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
   7309 
   7310   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
   7311   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
   7312 
   7313   return std::make_pair(Lo, Hi);
   7314 }
   7315 
   7316 // This function assumes all the vselect's arguments are CONCAT_VECTOR
   7317 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
   7318 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
   7319   SDLoc DL(N);
   7320   SDValue Cond = N->getOperand(0);
   7321   SDValue LHS = N->getOperand(1);
   7322   SDValue RHS = N->getOperand(2);
   7323   EVT VT = N->getValueType(0);
   7324   int NumElems = VT.getVectorNumElements();
   7325   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
   7326          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
   7327          Cond.getOpcode() == ISD::BUILD_VECTOR);
   7328 
   7329   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
   7330   // binary ones here.
   7331   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
   7332     return SDValue();
   7333 
   7334   // We're sure we have an even number of elements due to the
   7335   // concat_vectors we have as arguments to vselect.
   7336   // Skip BV elements until we find one that's not an UNDEF
   7337   // After we find an UNDEF element, keep looping until we get to half the
   7338   // length of the BV and see if all the non-undef nodes are the same.
   7339   ConstantSDNode *BottomHalf = nullptr;
   7340   for (int i = 0; i < NumElems / 2; ++i) {
   7341     if (Cond->getOperand(i)->isUndef())
   7342       continue;
   7343 
   7344     if (BottomHalf == nullptr)
   7345       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
   7346     else if (Cond->getOperand(i).getNode() != BottomHalf)
   7347       return SDValue();
   7348   }
   7349 
   7350   // Do the same for the second half of the BuildVector
   7351   ConstantSDNode *TopHalf = nullptr;
   7352   for (int i = NumElems / 2; i < NumElems; ++i) {
   7353     if (Cond->getOperand(i)->isUndef())
   7354       continue;
   7355 
   7356     if (TopHalf == nullptr)
   7357       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
   7358     else if (Cond->getOperand(i).getNode() != TopHalf)
   7359       return SDValue();
   7360   }
   7361 
   7362   assert(TopHalf && BottomHalf &&
   7363          "One half of the selector was all UNDEFs and the other was all the "
   7364          "same value. This should have been addressed before this function.");
   7365   return DAG.getNode(
   7366       ISD::CONCAT_VECTORS, DL, VT,
   7367       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
   7368       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
   7369 }
   7370 
   7371 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
   7372   if (Level >= AfterLegalizeTypes)
   7373     return SDValue();
   7374 
   7375   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
   7376   SDValue Mask = MSC->getMask();
   7377   SDValue Data  = MSC->getValue();
   7378   SDLoc DL(N);
   7379 
   7380   // If the MSCATTER data type requires splitting and the mask is provided by a
   7381   // SETCC, then split both nodes and its operands before legalization. This
   7382   // prevents the type legalizer from unrolling SETCC into scalar comparisons
   7383   // and enables future optimizations (e.g. min/max pattern matching on X86).
   7384   if (Mask.getOpcode() != ISD::SETCC)
   7385     return SDValue();
   7386 
   7387   // Check if any splitting is required.
   7388   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
   7389       TargetLowering::TypeSplitVector)
   7390     return SDValue();
   7391   SDValue MaskLo, MaskHi, Lo, Hi;
   7392   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
   7393 
   7394   EVT LoVT, HiVT;
   7395   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
   7396 
   7397   SDValue Chain = MSC->getChain();
   7398 
   7399   EVT MemoryVT = MSC->getMemoryVT();
   7400   unsigned Alignment = MSC->getOriginalAlignment();
   7401 
   7402   EVT LoMemVT, HiMemVT;
   7403   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
   7404 
   7405   SDValue DataLo, DataHi;
   7406   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
   7407 
   7408   SDValue Scale = MSC->getScale();
   7409   SDValue BasePtr = MSC->getBasePtr();
   7410   SDValue IndexLo, IndexHi;
   7411   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
   7412 
   7413   MachineMemOperand *MMO = DAG.getMachineFunction().
   7414     getMachineMemOperand(MSC->getPointerInfo(),
   7415                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
   7416                           Alignment, MSC->getAAInfo(), MSC->getRanges());
   7417 
   7418   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
   7419   Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
   7420                             DL, OpsLo, MMO);
   7421 
   7422   SDValue OpsHi[] = { Chain, DataHi, MaskHi, BasePtr, IndexHi, Scale };
   7423   Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
   7424                             DL, OpsHi, MMO);
   7425 
   7426   AddToWorklist(Lo.getNode());
   7427   AddToWorklist(Hi.getNode());
   7428 
   7429   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
   7430 }
   7431 
   7432 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
   7433   if (Level >= AfterLegalizeTypes)
   7434     return SDValue();
   7435 
   7436   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
   7437   SDValue Mask = MST->getMask();
   7438   SDValue Data  = MST->getValue();
   7439   EVT VT = Data.getValueType();
   7440   SDLoc DL(N);
   7441 
   7442   // If the MSTORE data type requires splitting and the mask is provided by a
   7443   // SETCC, then split both nodes and its operands before legalization. This
   7444   // prevents the type legalizer from unrolling SETCC into scalar comparisons
   7445   // and enables future optimizations (e.g. min/max pattern matching on X86).
   7446   if (Mask.getOpcode() == ISD::SETCC) {
   7447     // Check if any splitting is required.
   7448     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
   7449         TargetLowering::TypeSplitVector)
   7450       return SDValue();
   7451 
   7452     SDValue MaskLo, MaskHi, Lo, Hi;
   7453     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
   7454 
   7455     SDValue Chain = MST->getChain();
   7456     SDValue Ptr   = MST->getBasePtr();
   7457 
   7458     EVT MemoryVT = MST->getMemoryVT();
   7459     unsigned Alignment = MST->getOriginalAlignment();
   7460 
   7461     // if Alignment is equal to the vector size,
   7462     // take the half of it for the second part
   7463     unsigned SecondHalfAlignment =
   7464       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
   7465 
   7466     EVT LoMemVT, HiMemVT;
   7467     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
   7468 
   7469     SDValue DataLo, DataHi;
   7470     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
   7471 
   7472     MachineMemOperand *MMO = DAG.getMachineFunction().
   7473       getMachineMemOperand(MST->getPointerInfo(),
   7474                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
   7475                            Alignment, MST->getAAInfo(), MST->getRanges());
   7476 
   7477     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
   7478                             MST->isTruncatingStore(),
   7479                             MST->isCompressingStore());
   7480 
   7481     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
   7482                                      MST->isCompressingStore());
   7483     unsigned HiOffset = LoMemVT.getStoreSize();
   7484 
   7485     MMO = DAG.getMachineFunction().getMachineMemOperand(
   7486         MST->getPointerInfo().getWithOffset(HiOffset),
   7487         MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
   7488         MST->getAAInfo(), MST->getRanges());
   7489 
   7490     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
   7491                             MST->isTruncatingStore(),
   7492                             MST->isCompressingStore());
   7493 
   7494     AddToWorklist(Lo.getNode());
   7495     AddToWorklist(Hi.getNode());
   7496 
   7497     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
   7498   }
   7499   return SDValue();
   7500 }
   7501 
   7502 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
   7503   if (Level >= AfterLegalizeTypes)
   7504     return SDValue();
   7505 
   7506   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
   7507   SDValue Mask = MGT->getMask();
   7508   SDLoc DL(N);
   7509 
   7510   // If the MGATHER result requires splitting and the mask is provided by a
   7511   // SETCC, then split both nodes and its operands before legalization. This
   7512   // prevents the type legalizer from unrolling SETCC into scalar comparisons
   7513   // and enables future optimizations (e.g. min/max pattern matching on X86).
   7514 
   7515   if (Mask.getOpcode() != ISD::SETCC)
   7516     return SDValue();
   7517 
   7518   EVT VT = N->getValueType(0);
   7519 
   7520   // Check if any splitting is required.
   7521   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
   7522       TargetLowering::TypeSplitVector)
   7523     return SDValue();
   7524 
   7525   SDValue MaskLo, MaskHi, Lo, Hi;
   7526   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
   7527 
   7528   SDValue Src0 = MGT->getValue();
   7529   SDValue Src0Lo, Src0Hi;
   7530   std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
   7531 
   7532   EVT LoVT, HiVT;
   7533   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
   7534 
   7535   SDValue Chain = MGT->getChain();
   7536   EVT MemoryVT = MGT->getMemoryVT();
   7537   unsigned Alignment = MGT->getOriginalAlignment();
   7538 
   7539   EVT LoMemVT, HiMemVT;
   7540   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
   7541 
   7542   SDValue Scale = MGT->getScale();
   7543   SDValue BasePtr = MGT->getBasePtr();
   7544   SDValue Index = MGT->getIndex();
   7545   SDValue IndexLo, IndexHi;
   7546   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
   7547 
   7548   MachineMemOperand *MMO = DAG.getMachineFunction().
   7549     getMachineMemOperand(MGT->getPointerInfo(),
   7550                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
   7551                           Alignment, MGT->getAAInfo(), MGT->getRanges());
   7552 
   7553   SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo, Scale };
   7554   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
   7555                            MMO);
   7556 
   7557   SDValue OpsHi[] = { Chain, Src0Hi, MaskHi, BasePtr, IndexHi, Scale };
   7558   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
   7559                            MMO);
   7560 
   7561   AddToWorklist(Lo.getNode());
   7562   AddToWorklist(Hi.getNode());
   7563 
   7564   // Build a factor node to remember that this load is independent of the
   7565   // other one.
   7566   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
   7567                       Hi.getValue(1));
   7568 
   7569   // Legalized the chain result - switch anything that used the old chain to
   7570   // use the new one.
   7571   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
   7572 
   7573   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
   7574 
   7575   SDValue RetOps[] = { GatherRes, Chain };
   7576   return DAG.getMergeValues(RetOps, DL);
   7577 }
   7578 
   7579 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
   7580   if (Level >= AfterLegalizeTypes)
   7581     return SDValue();
   7582 
   7583   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
   7584   SDValue Mask = MLD->getMask();
   7585   SDLoc DL(N);
   7586 
   7587   // If the MLOAD result requires splitting and the mask is provided by a
   7588   // SETCC, then split both nodes and its operands before legalization. This
   7589   // prevents the type legalizer from unrolling SETCC into scalar comparisons
   7590   // and enables future optimizations (e.g. min/max pattern matching on X86).
   7591   if (Mask.getOpcode() == ISD::SETCC) {
   7592     EVT VT = N->getValueType(0);
   7593 
   7594     // Check if any splitting is required.
   7595     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
   7596         TargetLowering::TypeSplitVector)
   7597       return SDValue();
   7598 
   7599     SDValue MaskLo, MaskHi, Lo, Hi;
   7600     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
   7601 
   7602     SDValue Src0 = MLD->getSrc0();
   7603     SDValue Src0Lo, Src0Hi;
   7604     std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
   7605 
   7606     EVT LoVT, HiVT;
   7607     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
   7608 
   7609     SDValue Chain = MLD->getChain();
   7610     SDValue Ptr   = MLD->getBasePtr();
   7611     EVT MemoryVT = MLD->getMemoryVT();
   7612     unsigned Alignment = MLD->getOriginalAlignment();
   7613 
   7614     // if Alignment is equal to the vector size,
   7615     // take the half of it for the second part
   7616     unsigned SecondHalfAlignment =
   7617       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
   7618          Alignment/2 : Alignment;
   7619 
   7620     EVT LoMemVT, HiMemVT;
   7621     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
   7622 
   7623     MachineMemOperand *MMO = DAG.getMachineFunction().
   7624     getMachineMemOperand(MLD->getPointerInfo(),
   7625                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
   7626                          Alignment, MLD->getAAInfo(), MLD->getRanges());
   7627 
   7628     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
   7629                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
   7630 
   7631     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
   7632                                      MLD->isExpandingLoad());
   7633     unsigned HiOffset = LoMemVT.getStoreSize();
   7634 
   7635     MMO = DAG.getMachineFunction().getMachineMemOperand(
   7636         MLD->getPointerInfo().getWithOffset(HiOffset),
   7637         MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
   7638         MLD->getAAInfo(), MLD->getRanges());
   7639 
   7640     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
   7641                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
   7642 
   7643     AddToWorklist(Lo.getNode());
   7644     AddToWorklist(Hi.getNode());
   7645 
   7646     // Build a factor node to remember that this load is independent of the
   7647     // other one.
   7648     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
   7649                         Hi.getValue(1));
   7650 
   7651     // Legalized the chain result - switch anything that used the old chain to
   7652     // use the new one.
   7653     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
   7654 
   7655     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
   7656 
   7657     SDValue RetOps[] = { LoadRes, Chain };
   7658     return DAG.getMergeValues(RetOps, DL);
   7659   }
   7660   return SDValue();
   7661 }
   7662 
   7663 /// A vector select of 2 constant vectors can be simplified to math/logic to
   7664 /// avoid a variable select instruction and possibly avoid constant loads.
   7665 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
   7666   SDValue Cond = N->getOperand(0);
   7667   SDValue N1 = N->getOperand(1);
   7668   SDValue N2 = N->getOperand(2);
   7669   EVT VT = N->getValueType(0);
   7670   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
   7671       !TLI.convertSelectOfConstantsToMath(VT) ||
   7672       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
   7673       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
   7674     return SDValue();
   7675 
   7676   // Check if we can use the condition value to increment/decrement a single
   7677   // constant value. This simplifies a select to an add and removes a constant
   7678   // load/materialization from the general case.
   7679   bool AllAddOne = true;
   7680   bool AllSubOne = true;
   7681   unsigned Elts = VT.getVectorNumElements();
   7682   for (unsigned i = 0; i != Elts; ++i) {
   7683     SDValue N1Elt = N1.getOperand(i);
   7684     SDValue N2Elt = N2.getOperand(i);
   7685     if (N1Elt.isUndef() || N2Elt.isUndef())
   7686       continue;
   7687 
   7688     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
   7689     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
   7690     if (C1 != C2 + 1)
   7691       AllAddOne = false;
   7692     if (C1 != C2 - 1)
   7693       AllSubOne = false;
   7694   }
   7695 
   7696   // Further simplifications for the extra-special cases where the constants are
   7697   // all 0 or all -1 should be implemented as folds of these patterns.
   7698   SDLoc DL(N);
   7699   if (AllAddOne || AllSubOne) {
   7700     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
   7701     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
   7702     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
   7703     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
   7704     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
   7705   }
   7706 
   7707   // The general case for select-of-constants:
   7708   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
   7709   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
   7710   // leave that to a machine-specific pass.
   7711   return SDValue();
   7712 }
   7713 
   7714 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
   7715   SDValue N0 = N->getOperand(0);
   7716   SDValue N1 = N->getOperand(1);
   7717   SDValue N2 = N->getOperand(2);
   7718   SDLoc DL(N);
   7719 
   7720   // fold (vselect C, X, X) -> X
   7721   if (N1 == N2)
   7722     return N1;
   7723 
   7724   // Canonicalize integer abs.
   7725   // vselect (setg[te] X,  0),  X, -X ->
   7726   // vselect (setgt    X, -1),  X, -X ->
   7727   // vselect (setl[te] X,  0), -X,  X ->
   7728   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
   7729   if (N0.getOpcode() == ISD::SETCC) {
   7730     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
   7731     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
   7732     bool isAbs = false;
   7733     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
   7734 
   7735     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
   7736          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
   7737         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
   7738       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
   7739     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
   7740              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
   7741       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
   7742 
   7743     if (isAbs) {
   7744       EVT VT = LHS.getValueType();
   7745       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
   7746         return DAG.getNode(ISD::ABS, DL, VT, LHS);
   7747 
   7748       SDValue Shift = DAG.getNode(
   7749           ISD::SRA, DL, VT, LHS,
   7750           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
   7751       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
   7752       AddToWorklist(Shift.getNode());
   7753       AddToWorklist(Add.getNode());
   7754       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
   7755     }
   7756 
   7757     // If this select has a condition (setcc) with narrower operands than the
   7758     // select, try to widen the compare to match the select width.
   7759     // TODO: This should be extended to handle any constant.
   7760     // TODO: This could be extended to handle non-loading patterns, but that
   7761     //       requires thorough testing to avoid regressions.
   7762     if (isNullConstantOrNullSplatConstant(RHS)) {
   7763       EVT NarrowVT = LHS.getValueType();
   7764       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
   7765       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
   7766       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
   7767       unsigned WideWidth = WideVT.getScalarSizeInBits();
   7768       bool IsSigned = isSignedIntSetCC(CC);
   7769       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
   7770       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
   7771           SetCCWidth != 1 && SetCCWidth < WideWidth &&
   7772           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
   7773           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
   7774         // Both compare operands can be widened for free. The LHS can use an
   7775         // extended load, and the RHS is a constant:
   7776         //   vselect (ext (setcc load(X), C)), N1, N2 -->
   7777         //   vselect (setcc extload(X), C'), N1, N2
   7778         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
   7779         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
   7780         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
   7781         EVT WideSetCCVT = getSetCCResultType(WideVT);
   7782         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
   7783         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
   7784       }
   7785     }
   7786   }
   7787 
   7788   if (SimplifySelectOps(N, N1, N2))
   7789     return SDValue(N, 0);  // Don't revisit N.
   7790 
   7791   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
   7792   if (ISD::isBuildVectorAllOnes(N0.getNode()))
   7793     return N1;
   7794   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
   7795   if (ISD::isBuildVectorAllZeros(N0.getNode()))
   7796     return N2;
   7797 
   7798   // The ConvertSelectToConcatVector function is assuming both the above
   7799   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
   7800   // and addressed.
   7801   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
   7802       N2.getOpcode() == ISD::CONCAT_VECTORS &&
   7803       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
   7804     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
   7805       return CV;
   7806   }
   7807 
   7808   if (SDValue V = foldVSelectOfConstants(N))
   7809     return V;
   7810 
   7811   return SDValue();
   7812 }
   7813 
   7814 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
   7815   SDValue N0 = N->getOperand(0);
   7816   SDValue N1 = N->getOperand(1);
   7817   SDValue N2 = N->getOperand(2);
   7818   SDValue N3 = N->getOperand(3);
   7819   SDValue N4 = N->getOperand(4);
   7820   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
   7821 
   7822   // fold select_cc lhs, rhs, x, x, cc -> x
   7823   if (N2 == N3)
   7824     return N2;
   7825 
   7826   // Determine if the condition we're dealing with is constant
   7827   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
   7828                                   CC, SDLoc(N), false)) {
   7829     AddToWorklist(SCC.getNode());
   7830 
   7831     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
   7832       if (!SCCC->isNullValue())
   7833         return N2;    // cond always true -> true val
   7834       else
   7835         return N3;    // cond always false -> false val
   7836     } else if (SCC->isUndef()) {
   7837       // When the condition is UNDEF, just return the first operand. This is
   7838       // coherent the DAG creation, no setcc node is created in this case
   7839       return N2;
   7840     } else if (SCC.getOpcode() == ISD::SETCC) {
   7841       // Fold to a simpler select_cc
   7842       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
   7843                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
   7844                          SCC.getOperand(2));
   7845     }
   7846   }
   7847 
   7848   // If we can fold this based on the true/false value, do so.
   7849   if (SimplifySelectOps(N, N2, N3))
   7850     return SDValue(N, 0);  // Don't revisit N.
   7851 
   7852   // fold select_cc into other things, such as min/max/abs
   7853   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
   7854 }
   7855 
   7856 SDValue DAGCombiner::visitSETCC(SDNode *N) {
   7857   // setcc is very commonly used as an argument to brcond. This pattern
   7858   // also lend itself to numerous combines and, as a result, it is desired
   7859   // we keep the argument to a brcond as a setcc as much as possible.
   7860   bool PreferSetCC =
   7861       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
   7862 
   7863   SDValue Combined = SimplifySetCC(
   7864       N->getValueType(0), N->getOperand(0), N->getOperand(1),
   7865       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
   7866 
   7867   if (!Combined)
   7868     return SDValue();
   7869 
   7870   // If we prefer to have a setcc, and we don't, we'll try our best to
   7871   // recreate one using rebuildSetCC.
   7872   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
   7873     SDValue NewSetCC = rebuildSetCC(Combined);
   7874 
   7875     // We don't have anything interesting to combine to.
   7876     if (NewSetCC.getNode() == N)
   7877       return SDValue();
   7878 
   7879     if (NewSetCC)
   7880       return NewSetCC;
   7881   }
   7882 
   7883   return Combined;
   7884 }
   7885 
   7886 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
   7887   SDValue LHS = N->getOperand(0);
   7888   SDValue RHS = N->getOperand(1);
   7889   SDValue Carry = N->getOperand(2);
   7890   SDValue Cond = N->getOperand(3);
   7891 
   7892   // If Carry is false, fold to a regular SETCC.
   7893   if (isNullConstant(Carry))
   7894     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
   7895 
   7896   return SDValue();
   7897 }
   7898 
   7899 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
   7900 /// a build_vector of constants.
   7901 /// This function is called by the DAGCombiner when visiting sext/zext/aext
   7902 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
   7903 /// Vector extends are not folded if operations are legal; this is to
   7904 /// avoid introducing illegal build_vector dag nodes.
   7905 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
   7906                                          SelectionDAG &DAG, bool LegalTypes,
   7907                                          bool LegalOperations) {
   7908   unsigned Opcode = N->getOpcode();
   7909   SDValue N0 = N->getOperand(0);
   7910   EVT VT = N->getValueType(0);
   7911 
   7912   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
   7913          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
   7914          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
   7915          && "Expected EXTEND dag node in input!");
   7916 
   7917   // fold (sext c1) -> c1
   7918   // fold (zext c1) -> c1
   7919   // fold (aext c1) -> c1
   7920   if (isa<ConstantSDNode>(N0))
   7921     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
   7922 
   7923   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
   7924   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
   7925   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
   7926   EVT SVT = VT.getScalarType();
   7927   if (!(VT.isVector() &&
   7928       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
   7929       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
   7930     return nullptr;
   7931 
   7932   // We can fold this node into a build_vector.
   7933   unsigned VTBits = SVT.getSizeInBits();
   7934   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
   7935   SmallVector<SDValue, 8> Elts;
   7936   unsigned NumElts = VT.getVectorNumElements();
   7937   SDLoc DL(N);
   7938 
   7939   for (unsigned i=0; i != NumElts; ++i) {
   7940     SDValue Op = N0->getOperand(i);
   7941     if (Op->isUndef()) {
   7942       Elts.push_back(DAG.getUNDEF(SVT));
   7943       continue;
   7944     }
   7945 
   7946     SDLoc DL(Op);
   7947     // Get the constant value and if needed trunc it to the size of the type.
   7948     // Nodes like build_vector might have constants wider than the scalar type.
   7949     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
   7950     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
   7951       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
   7952     else
   7953       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
   7954   }
   7955 
   7956   return DAG.getBuildVector(VT, DL, Elts).getNode();
   7957 }
   7958 
   7959 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
   7960 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
   7961 // transformation. Returns true if extension are possible and the above
   7962 // mentioned transformation is profitable.
   7963 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
   7964                                     unsigned ExtOpc,
   7965                                     SmallVectorImpl<SDNode *> &ExtendNodes,
   7966                                     const TargetLowering &TLI) {
   7967   bool HasCopyToRegUses = false;
   7968   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
   7969   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
   7970                             UE = N0.getNode()->use_end();
   7971        UI != UE; ++UI) {
   7972     SDNode *User = *UI;
   7973     if (User == N)
   7974       continue;
   7975     if (UI.getUse().getResNo() != N0.getResNo())
   7976       continue;
   7977     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
   7978     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
   7979       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
   7980       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
   7981         // Sign bits will be lost after a zext.
   7982         return false;
   7983       bool Add = false;
   7984       for (unsigned i = 0; i != 2; ++i) {
   7985         SDValue UseOp = User->getOperand(i);
   7986         if (UseOp == N0)
   7987           continue;
   7988         if (!isa<ConstantSDNode>(UseOp))
   7989           return false;
   7990         Add = true;
   7991       }
   7992       if (Add)
   7993         ExtendNodes.push_back(User);
   7994       continue;
   7995     }
   7996     // If truncates aren't free and there are users we can't
   7997     // extend, it isn't worthwhile.
   7998     if (!isTruncFree)
   7999       return false;
   8000     // Remember if this value is live-out.
   8001     if (User->getOpcode() == ISD::CopyToReg)
   8002       HasCopyToRegUses = true;
   8003   }
   8004 
   8005   if (HasCopyToRegUses) {
   8006     bool BothLiveOut = false;
   8007     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
   8008          UI != UE; ++UI) {
   8009       SDUse &Use = UI.getUse();
   8010       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
   8011         BothLiveOut = true;
   8012         break;
   8013       }
   8014     }
   8015     if (BothLiveOut)
   8016       // Both unextended and extended values are live out. There had better be
   8017       // a good reason for the transformation.
   8018       return ExtendNodes.size();
   8019   }
   8020   return true;
   8021 }
   8022 
   8023 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
   8024                                   SDValue OrigLoad, SDValue ExtLoad,
   8025                                   ISD::NodeType ExtType) {
   8026   // Extend SetCC uses if necessary.
   8027   SDLoc DL(ExtLoad);
   8028   for (SDNode *SetCC : SetCCs) {
   8029     SmallVector<SDValue, 4> Ops;
   8030 
   8031     for (unsigned j = 0; j != 2; ++j) {
   8032       SDValue SOp = SetCC->getOperand(j);
   8033       if (SOp == OrigLoad)
   8034         Ops.push_back(ExtLoad);
   8035       else
   8036         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
   8037     }
   8038 
   8039     Ops.push_back(SetCC->getOperand(2));
   8040     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
   8041   }
   8042 }
   8043 
   8044 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
   8045 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
   8046   SDValue N0 = N->getOperand(0);
   8047   EVT DstVT = N->getValueType(0);
   8048   EVT SrcVT = N0.getValueType();
   8049 
   8050   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
   8051           N->getOpcode() == ISD::ZERO_EXTEND) &&
   8052          "Unexpected node type (not an extend)!");
   8053 
   8054   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
   8055   // For example, on a target with legal v4i32, but illegal v8i32, turn:
   8056   //   (v8i32 (sext (v8i16 (load x))))
   8057   // into:
   8058   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
   8059   //                          (v4i32 (sextload (x + 16)))))
   8060   // Where uses of the original load, i.e.:
   8061   //   (v8i16 (load x))
   8062   // are replaced with:
   8063   //   (v8i16 (truncate
   8064   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
   8065   //                            (v4i32 (sextload (x + 16)))))))
   8066   //
   8067   // This combine is only applicable to illegal, but splittable, vectors.
   8068   // All legal types, and illegal non-vector types, are handled elsewhere.
   8069   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
   8070   //
   8071   if (N0->getOpcode() != ISD::LOAD)
   8072     return SDValue();
   8073 
   8074   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   8075 
   8076   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
   8077       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
   8078       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
   8079     return SDValue();
   8080 
   8081   SmallVector<SDNode *, 4> SetCCs;
   8082   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
   8083     return SDValue();
   8084 
   8085   ISD::LoadExtType ExtType =
   8086       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
   8087 
   8088   // Try to split the vector types to get down to legal types.
   8089   EVT SplitSrcVT = SrcVT;
   8090   EVT SplitDstVT = DstVT;
   8091   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
   8092          SplitSrcVT.getVectorNumElements() > 1) {
   8093     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
   8094     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
   8095   }
   8096 
   8097   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
   8098     return SDValue();
   8099 
   8100   SDLoc DL(N);
   8101   const unsigned NumSplits =
   8102       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
   8103   const unsigned Stride = SplitSrcVT.getStoreSize();
   8104   SmallVector<SDValue, 4> Loads;
   8105   SmallVector<SDValue, 4> Chains;
   8106 
   8107   SDValue BasePtr = LN0->getBasePtr();
   8108   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
   8109     const unsigned Offset = Idx * Stride;
   8110     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
   8111 
   8112     SDValue SplitLoad = DAG.getExtLoad(
   8113         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
   8114         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
   8115         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
   8116 
   8117     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
   8118                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
   8119 
   8120     Loads.push_back(SplitLoad.getValue(0));
   8121     Chains.push_back(SplitLoad.getValue(1));
   8122   }
   8123 
   8124   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
   8125   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
   8126 
   8127   // Simplify TF.
   8128   AddToWorklist(NewChain.getNode());
   8129 
   8130   CombineTo(N, NewValue);
   8131 
   8132   // Replace uses of the original load (before extension)
   8133   // with a truncate of the concatenated sextloaded vectors.
   8134   SDValue Trunc =
   8135       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
   8136   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
   8137   CombineTo(N0.getNode(), Trunc, NewChain);
   8138   return SDValue(N, 0); // Return N so it doesn't get rechecked!
   8139 }
   8140 
   8141 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
   8142 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
   8143 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
   8144   assert(N->getOpcode() == ISD::ZERO_EXTEND);
   8145   EVT VT = N->getValueType(0);
   8146 
   8147   // and/or/xor
   8148   SDValue N0 = N->getOperand(0);
   8149   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
   8150         N0.getOpcode() == ISD::XOR) ||
   8151       N0.getOperand(1).getOpcode() != ISD::Constant ||
   8152       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
   8153     return SDValue();
   8154 
   8155   // shl/shr
   8156   SDValue N1 = N0->getOperand(0);
   8157   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
   8158       N1.getOperand(1).getOpcode() != ISD::Constant ||
   8159       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
   8160     return SDValue();
   8161 
   8162   // load
   8163   if (!isa<LoadSDNode>(N1.getOperand(0)))
   8164     return SDValue();
   8165   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
   8166   EVT MemVT = Load->getMemoryVT();
   8167   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
   8168       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
   8169     return SDValue();
   8170 
   8171 
   8172   // If the shift op is SHL, the logic op must be AND, otherwise the result
   8173   // will be wrong.
   8174   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
   8175     return SDValue();
   8176 
   8177   if (!N0.hasOneUse() || !N1.hasOneUse())
   8178     return SDValue();
   8179 
   8180   SmallVector<SDNode*, 4> SetCCs;
   8181   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
   8182                                ISD::ZERO_EXTEND, SetCCs, TLI))
   8183     return SDValue();
   8184 
   8185   // Actually do the transformation.
   8186   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
   8187                                    Load->getChain(), Load->getBasePtr(),
   8188                                    Load->getMemoryVT(), Load->getMemOperand());
   8189 
   8190   SDLoc DL1(N1);
   8191   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
   8192                               N1.getOperand(1));
   8193 
   8194   APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
   8195   Mask = Mask.zext(VT.getSizeInBits());
   8196   SDLoc DL0(N0);
   8197   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
   8198                             DAG.getConstant(Mask, DL0, VT));
   8199 
   8200   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
   8201   CombineTo(N, And);
   8202   if (SDValue(Load, 0).hasOneUse()) {
   8203     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
   8204   } else {
   8205     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
   8206                                 Load->getValueType(0), ExtLoad);
   8207     CombineTo(Load, Trunc, ExtLoad.getValue(1));
   8208   }
   8209   return SDValue(N,0); // Return N so it doesn't get rechecked!
   8210 }
   8211 
   8212 /// If we're narrowing or widening the result of a vector select and the final
   8213 /// size is the same size as a setcc (compare) feeding the select, then try to
   8214 /// apply the cast operation to the select's operands because matching vector
   8215 /// sizes for a select condition and other operands should be more efficient.
   8216 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
   8217   unsigned CastOpcode = Cast->getOpcode();
   8218   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
   8219           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
   8220           CastOpcode == ISD::FP_ROUND) &&
   8221          "Unexpected opcode for vector select narrowing/widening");
   8222 
   8223   // We only do this transform before legal ops because the pattern may be
   8224   // obfuscated by target-specific operations after legalization. Do not create
   8225   // an illegal select op, however, because that may be difficult to lower.
   8226   EVT VT = Cast->getValueType(0);
   8227   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
   8228     return SDValue();
   8229 
   8230   SDValue VSel = Cast->getOperand(0);
   8231   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
   8232       VSel.getOperand(0).getOpcode() != ISD::SETCC)
   8233     return SDValue();
   8234 
   8235   // Does the setcc have the same vector size as the casted select?
   8236   SDValue SetCC = VSel.getOperand(0);
   8237   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
   8238   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
   8239     return SDValue();
   8240 
   8241   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
   8242   SDValue A = VSel.getOperand(1);
   8243   SDValue B = VSel.getOperand(2);
   8244   SDValue CastA, CastB;
   8245   SDLoc DL(Cast);
   8246   if (CastOpcode == ISD::FP_ROUND) {
   8247     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
   8248     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
   8249     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
   8250   } else {
   8251     CastA = DAG.getNode(CastOpcode, DL, VT, A);
   8252     CastB = DAG.getNode(CastOpcode, DL, VT, B);
   8253   }
   8254   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
   8255 }
   8256 
   8257 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
   8258 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
   8259 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
   8260                                      const TargetLowering &TLI, EVT VT,
   8261                                      bool LegalOperations, SDNode *N,
   8262                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
   8263   SDNode *N0Node = N0.getNode();
   8264   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
   8265                                                    : ISD::isZEXTLoad(N0Node);
   8266   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
   8267       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
   8268     return {};
   8269 
   8270   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   8271   EVT MemVT = LN0->getMemoryVT();
   8272   if ((LegalOperations || LN0->isVolatile()) &&
   8273       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
   8274     return {};
   8275 
   8276   SDValue ExtLoad =
   8277       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
   8278                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
   8279   Combiner.CombineTo(N, ExtLoad);
   8280   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
   8281   return SDValue(N, 0); // Return N so it doesn't get rechecked!
   8282 }
   8283 
   8284 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
   8285 // Only generate vector extloads when 1) they're legal, and 2) they are
   8286 // deemed desirable by the target.
   8287 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
   8288                                   const TargetLowering &TLI, EVT VT,
   8289                                   bool LegalOperations, SDNode *N, SDValue N0,
   8290                                   ISD::LoadExtType ExtLoadType,
   8291                                   ISD::NodeType ExtOpc) {
   8292   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
   8293       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
   8294       ((LegalOperations || VT.isVector() ||
   8295         cast<LoadSDNode>(N0)->isVolatile()) &&
   8296        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
   8297     return {};
   8298 
   8299   bool DoXform = true;
   8300   SmallVector<SDNode *, 4> SetCCs;
   8301   if (!N0.hasOneUse())
   8302     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
   8303   if (VT.isVector())
   8304     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
   8305   if (!DoXform)
   8306     return {};
   8307 
   8308   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   8309   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
   8310                                    LN0->getBasePtr(), N0.getValueType(),
   8311                                    LN0->getMemOperand());
   8312   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
   8313   // If the load value is used only by N, replace it via CombineTo N.
   8314   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
   8315   Combiner.CombineTo(N, ExtLoad);
   8316   if (NoReplaceTrunc) {
   8317     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
   8318   } else {
   8319     SDValue Trunc =
   8320         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
   8321     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
   8322   }
   8323   return SDValue(N, 0); // Return N so it doesn't get rechecked!
   8324 }
   8325 
   8326 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
   8327                                        bool LegalOperations) {
   8328   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
   8329           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
   8330 
   8331   SDValue SetCC = N->getOperand(0);
   8332   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
   8333       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
   8334     return SDValue();
   8335 
   8336   SDValue X = SetCC.getOperand(0);
   8337   SDValue Ones = SetCC.getOperand(1);
   8338   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
   8339   EVT VT = N->getValueType(0);
   8340   EVT XVT = X.getValueType();
   8341   // setge X, C is canonicalized to setgt, so we do not need to match that
   8342   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
   8343   // not require the 'not' op.
   8344   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
   8345     // Invert and smear/shift the sign bit:
   8346     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
   8347     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
   8348     SDLoc DL(N);
   8349     SDValue NotX = DAG.getNOT(DL, X, VT);
   8350     SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
   8351     auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
   8352     return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
   8353   }
   8354   return SDValue();
   8355 }
   8356 
   8357 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   8358   SDValue N0 = N->getOperand(0);
   8359   EVT VT = N->getValueType(0);
   8360   SDLoc DL(N);
   8361 
   8362   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
   8363                                               LegalOperations))
   8364     return SDValue(Res, 0);
   8365 
   8366   // fold (sext (sext x)) -> (sext x)
   8367   // fold (sext (aext x)) -> (sext x)
   8368   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
   8369     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
   8370 
   8371   if (N0.getOpcode() == ISD::TRUNCATE) {
   8372     // fold (sext (truncate (load x))) -> (sext (smaller load x))
   8373     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
   8374     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
   8375       SDNode *oye = N0.getOperand(0).getNode();
   8376       if (NarrowLoad.getNode() != N0.getNode()) {
   8377         CombineTo(N0.getNode(), NarrowLoad);
   8378         // CombineTo deleted the truncate, if needed, but not what's under it.
   8379         AddToWorklist(oye);
   8380       }
   8381       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   8382     }
   8383 
   8384     // See if the value being truncated is already sign extended.  If so, just
   8385     // eliminate the trunc/sext pair.
   8386     SDValue Op = N0.getOperand(0);
   8387     unsigned OpBits   = Op.getScalarValueSizeInBits();
   8388     unsigned MidBits  = N0.getScalarValueSizeInBits();
   8389     unsigned DestBits = VT.getScalarSizeInBits();
   8390     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
   8391 
   8392     if (OpBits == DestBits) {
   8393       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
   8394       // bits, it is already ready.
   8395       if (NumSignBits > DestBits-MidBits)
   8396         return Op;
   8397     } else if (OpBits < DestBits) {
   8398       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
   8399       // bits, just sext from i32.
   8400       if (NumSignBits > OpBits-MidBits)
   8401         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
   8402     } else {
   8403       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
   8404       // bits, just truncate to i32.
   8405       if (NumSignBits > OpBits-MidBits)
   8406         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
   8407     }
   8408 
   8409     // fold (sext (truncate x)) -> (sextinreg x).
   8410     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
   8411                                                  N0.getValueType())) {
   8412       if (OpBits < DestBits)
   8413         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
   8414       else if (OpBits > DestBits)
   8415         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
   8416       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
   8417                          DAG.getValueType(N0.getValueType()));
   8418     }
   8419   }
   8420 
   8421   // Try to simplify (sext (load x)).
   8422   if (SDValue foldedExt =
   8423           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
   8424                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
   8425     return foldedExt;
   8426 
   8427   // fold (sext (load x)) to multiple smaller sextloads.
   8428   // Only on illegal but splittable vectors.
   8429   if (SDValue ExtLoad = CombineExtLoad(N))
   8430     return ExtLoad;
   8431 
   8432   // Try to simplify (sext (sextload x)).
   8433   if (SDValue foldedExt = tryToFoldExtOfExtload(
   8434           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
   8435     return foldedExt;
   8436 
   8437   // fold (sext (and/or/xor (load x), cst)) ->
   8438   //      (and/or/xor (sextload x), (sext cst))
   8439   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
   8440        N0.getOpcode() == ISD::XOR) &&
   8441       isa<LoadSDNode>(N0.getOperand(0)) &&
   8442       N0.getOperand(1).getOpcode() == ISD::Constant &&
   8443       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
   8444     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
   8445     EVT MemVT = LN00->getMemoryVT();
   8446     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
   8447       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
   8448       SmallVector<SDNode*, 4> SetCCs;
   8449       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
   8450                                              ISD::SIGN_EXTEND, SetCCs, TLI);
   8451       if (DoXform) {
   8452         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
   8453                                          LN00->getChain(), LN00->getBasePtr(),
   8454                                          LN00->getMemoryVT(),
   8455                                          LN00->getMemOperand());
   8456         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
   8457         Mask = Mask.sext(VT.getSizeInBits());
   8458         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
   8459                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
   8460         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
   8461         bool NoReplaceTruncAnd = !N0.hasOneUse();
   8462         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
   8463         CombineTo(N, And);
   8464         // If N0 has multiple uses, change other uses as well.
   8465         if (NoReplaceTruncAnd) {
   8466           SDValue TruncAnd =
   8467               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
   8468           CombineTo(N0.getNode(), TruncAnd);
   8469         }
   8470         if (NoReplaceTrunc) {
   8471           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
   8472         } else {
   8473           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
   8474                                       LN00->getValueType(0), ExtLoad);
   8475           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
   8476         }
   8477         return SDValue(N,0); // Return N so it doesn't get rechecked!
   8478       }
   8479     }
   8480   }
   8481 
   8482   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
   8483     return V;
   8484 
   8485   if (N0.getOpcode() == ISD::SETCC) {
   8486     SDValue N00 = N0.getOperand(0);
   8487     SDValue N01 = N0.getOperand(1);
   8488     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
   8489     EVT N00VT = N0.getOperand(0).getValueType();
   8490 
   8491     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
   8492     // Only do this before legalize for now.
   8493     if (VT.isVector() && !LegalOperations &&
   8494         TLI.getBooleanContents(N00VT) ==
   8495             TargetLowering::ZeroOrNegativeOneBooleanContent) {
   8496       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
   8497       // of the same size as the compared operands. Only optimize sext(setcc())
   8498       // if this is the case.
   8499       EVT SVT = getSetCCResultType(N00VT);
   8500 
   8501       // We know that the # elements of the results is the same as the
   8502       // # elements of the compare (and the # elements of the compare result
   8503       // for that matter).  Check to see that they are the same size.  If so,
   8504       // we know that the element size of the sext'd result matches the
   8505       // element size of the compare operands.
   8506       if (VT.getSizeInBits() == SVT.getSizeInBits())
   8507         return DAG.getSetCC(DL, VT, N00, N01, CC);
   8508 
   8509       // If the desired elements are smaller or larger than the source
   8510       // elements, we can use a matching integer vector type and then
   8511       // truncate/sign extend.
   8512       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
   8513       if (SVT == MatchingVecType) {
   8514         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
   8515         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
   8516       }
   8517     }
   8518 
   8519     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
   8520     // Here, T can be 1 or -1, depending on the type of the setcc and
   8521     // getBooleanContents().
   8522     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
   8523 
   8524     // To determine the "true" side of the select, we need to know the high bit
   8525     // of the value returned by the setcc if it evaluates to true.
   8526     // If the type of the setcc is i1, then the true case of the select is just
   8527     // sext(i1 1), that is, -1.
   8528     // If the type of the setcc is larger (say, i8) then the value of the high
   8529     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
   8530     // of the appropriate width.
   8531     SDValue ExtTrueVal = (SetCCWidth == 1)
   8532                              ? DAG.getAllOnesConstant(DL, VT)
   8533                              : DAG.getBoolConstant(true, DL, VT, N00VT);
   8534     SDValue Zero = DAG.getConstant(0, DL, VT);
   8535     if (SDValue SCC =
   8536             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
   8537       return SCC;
   8538 
   8539     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
   8540       EVT SetCCVT = getSetCCResultType(N00VT);
   8541       // Don't do this transform for i1 because there's a select transform
   8542       // that would reverse it.
   8543       // TODO: We should not do this transform at all without a target hook
   8544       // because a sext is likely cheaper than a select?
   8545       if (SetCCVT.getScalarSizeInBits() != 1 &&
   8546           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
   8547         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
   8548         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
   8549       }
   8550     }
   8551   }
   8552 
   8553   // fold (sext x) -> (zext x) if the sign bit is known zero.
   8554   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
   8555       DAG.SignBitIsZero(N0))
   8556     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
   8557 
   8558   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
   8559     return NewVSel;
   8560 
   8561   return SDValue();
   8562 }
   8563 
   8564 // isTruncateOf - If N is a truncate of some other value, return true, record
   8565 // the value being truncated in Op and which of Op's bits are zero/one in Known.
   8566 // This function computes KnownBits to avoid a duplicated call to
   8567 // computeKnownBits in the caller.
   8568 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
   8569                          KnownBits &Known) {
   8570   if (N->getOpcode() == ISD::TRUNCATE) {
   8571     Op = N->getOperand(0);
   8572     DAG.computeKnownBits(Op, Known);
   8573     return true;
   8574   }
   8575 
   8576   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
   8577       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
   8578     return false;
   8579 
   8580   SDValue Op0 = N->getOperand(0);
   8581   SDValue Op1 = N->getOperand(1);
   8582   assert(Op0.getValueType() == Op1.getValueType());
   8583 
   8584   if (isNullConstant(Op0))
   8585     Op = Op1;
   8586   else if (isNullConstant(Op1))
   8587     Op = Op0;
   8588   else
   8589     return false;
   8590 
   8591   DAG.computeKnownBits(Op, Known);
   8592 
   8593   if (!(Known.Zero | 1).isAllOnesValue())
   8594     return false;
   8595 
   8596   return true;
   8597 }
   8598 
   8599 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   8600   SDValue N0 = N->getOperand(0);
   8601   EVT VT = N->getValueType(0);
   8602 
   8603   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
   8604                                               LegalOperations))
   8605     return SDValue(Res, 0);
   8606 
   8607   // fold (zext (zext x)) -> (zext x)
   8608   // fold (zext (aext x)) -> (zext x)
   8609   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
   8610     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
   8611                        N0.getOperand(0));
   8612 
   8613   // fold (zext (truncate x)) -> (zext x) or
   8614   //      (zext (truncate x)) -> (truncate x)
   8615   // This is valid when the truncated bits of x are already zero.
   8616   // FIXME: We should extend this to work for vectors too.
   8617   SDValue Op;
   8618   KnownBits Known;
   8619   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
   8620     APInt TruncatedBits =
   8621       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
   8622       APInt(Op.getValueSizeInBits(), 0) :
   8623       APInt::getBitsSet(Op.getValueSizeInBits(),
   8624                         N0.getValueSizeInBits(),
   8625                         std::min(Op.getValueSizeInBits(),
   8626                                  VT.getSizeInBits()));
   8627     if (TruncatedBits.isSubsetOf(Known.Zero))
   8628       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
   8629   }
   8630 
   8631   // fold (zext (truncate x)) -> (and x, mask)
   8632   if (N0.getOpcode() == ISD::TRUNCATE) {
   8633     // fold (zext (truncate (load x))) -> (zext (smaller load x))
   8634     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
   8635     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
   8636       SDNode *oye = N0.getOperand(0).getNode();
   8637       if (NarrowLoad.getNode() != N0.getNode()) {
   8638         CombineTo(N0.getNode(), NarrowLoad);
   8639         // CombineTo deleted the truncate, if needed, but not what's under it.
   8640         AddToWorklist(oye);
   8641       }
   8642       return SDValue(N, 0); // Return N so it doesn't get rechecked!
   8643     }
   8644 
   8645     EVT SrcVT = N0.getOperand(0).getValueType();
   8646     EVT MinVT = N0.getValueType();
   8647 
   8648     // Try to mask before the extension to avoid having to generate a larger mask,
   8649     // possibly over several sub-vectors.
   8650     if (SrcVT.bitsLT(VT) && VT.isVector()) {
   8651       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
   8652                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
   8653         SDValue Op = N0.getOperand(0);
   8654         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
   8655         AddToWorklist(Op.getNode());
   8656         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
   8657         // Transfer the debug info; the new node is equivalent to N0.
   8658         DAG.transferDbgValues(N0, ZExtOrTrunc);
   8659         return ZExtOrTrunc;
   8660       }
   8661     }
   8662 
   8663     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
   8664       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
   8665       AddToWorklist(Op.getNode());
   8666       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
   8667       // We may safely transfer the debug info describing the truncate node over
   8668       // to the equivalent and operation.
   8669       DAG.transferDbgValues(N0, And);
   8670       return And;
   8671     }
   8672   }
   8673 
   8674   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
   8675   // if either of the casts is not free.
   8676   if (N0.getOpcode() == ISD::AND &&
   8677       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
   8678       N0.getOperand(1).getOpcode() == ISD::Constant &&
   8679       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
   8680                            N0.getValueType()) ||
   8681        !TLI.isZExtFree(N0.getValueType(), VT))) {
   8682     SDValue X = N0.getOperand(0).getOperand(0);
   8683     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
   8684     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
   8685     Mask = Mask.zext(VT.getSizeInBits());
   8686     SDLoc DL(N);
   8687     return DAG.getNode(ISD::AND, DL, VT,
   8688                        X, DAG.getConstant(Mask, DL, VT));
   8689   }
   8690 
   8691   // Try to simplify (zext (load x)).
   8692   if (SDValue foldedExt =
   8693           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
   8694                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
   8695     return foldedExt;
   8696 
   8697   // fold (zext (load x)) to multiple smaller zextloads.
   8698   // Only on illegal but splittable vectors.
   8699   if (SDValue ExtLoad = CombineExtLoad(N))
   8700     return ExtLoad;
   8701 
   8702   // fold (zext (and/or/xor (load x), cst)) ->
   8703   //      (and/or/xor (zextload x), (zext cst))
   8704   // Unless (and (load x) cst) will match as a zextload already and has
   8705   // additional users.
   8706   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
   8707        N0.getOpcode() == ISD::XOR) &&
   8708       isa<LoadSDNode>(N0.getOperand(0)) &&
   8709       N0.getOperand(1).getOpcode() == ISD::Constant &&
   8710       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
   8711     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
   8712     EVT MemVT = LN00->getMemoryVT();
   8713     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
   8714         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
   8715       bool DoXform = true;
   8716       SmallVector<SDNode*, 4> SetCCs;
   8717       if (!N0.hasOneUse()) {
   8718         if (N0.getOpcode() == ISD::AND) {
   8719           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
   8720           EVT LoadResultTy = AndC->getValueType(0);
   8721           EVT ExtVT;
   8722           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
   8723             DoXform = false;
   8724         }
   8725       }
   8726       if (DoXform)
   8727         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
   8728                                           ISD::ZERO_EXTEND, SetCCs, TLI);
   8729       if (DoXform) {
   8730         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
   8731                                          LN00->getChain(), LN00->getBasePtr(),
   8732                                          LN00->getMemoryVT(),
   8733                                          LN00->getMemOperand());
   8734         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
   8735         Mask = Mask.zext(VT.getSizeInBits());
   8736         SDLoc DL(N);
   8737         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
   8738                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
   8739         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
   8740         bool NoReplaceTruncAnd = !N0.hasOneUse();
   8741         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
   8742         CombineTo(N, And);
   8743         // If N0 has multiple uses, change other uses as well.
   8744         if (NoReplaceTruncAnd) {
   8745           SDValue TruncAnd =
   8746               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
   8747           CombineTo(N0.getNode(), TruncAnd);
   8748         }
   8749         if (NoReplaceTrunc) {
   8750           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
   8751         } else {
   8752           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
   8753                                       LN00->getValueType(0), ExtLoad);
   8754           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
   8755         }
   8756         return SDValue(N,0); // Return N so it doesn't get rechecked!
   8757       }
   8758     }
   8759   }
   8760 
   8761   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
   8762   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
   8763   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
   8764     return ZExtLoad;
   8765 
   8766   // Try to simplify (zext (zextload x)).
   8767   if (SDValue foldedExt = tryToFoldExtOfExtload(
   8768           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
   8769     return foldedExt;
   8770 
   8771   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
   8772     return V;
   8773 
   8774   if (N0.getOpcode() == ISD::SETCC) {
   8775     // Only do this before legalize for now.
   8776     if (!LegalOperations && VT.isVector() &&
   8777         N0.getValueType().getVectorElementType() == MVT::i1) {
   8778       EVT N00VT = N0.getOperand(0).getValueType();
   8779       if (getSetCCResultType(N00VT) == N0.getValueType())
   8780         return SDValue();
   8781 
   8782       // We know that the # elements of the results is the same as the #
   8783       // elements of the compare (and the # elements of the compare result for
   8784       // that matter). Check to see that they are the same size. If so, we know
   8785       // that the element size of the sext'd result matches the element size of
   8786       // the compare operands.
   8787       SDLoc DL(N);
   8788       SDValue VecOnes = DAG.getConstant(1, DL, VT);
   8789       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
   8790         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
   8791         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
   8792                                      N0.getOperand(1), N0.getOperand(2));
   8793         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
   8794       }
   8795 
   8796       // If the desired elements are smaller or larger than the source
   8797       // elements we can use a matching integer vector type and then
   8798       // truncate/sign extend.
   8799       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
   8800       SDValue VsetCC =
   8801           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
   8802                       N0.getOperand(1), N0.getOperand(2));
   8803       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
   8804                          VecOnes);
   8805     }
   8806 
   8807     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
   8808     SDLoc DL(N);
   8809     if (SDValue SCC = SimplifySelectCC(
   8810             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
   8811             DAG.getConstant(0, DL, VT),
   8812             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
   8813       return SCC;
   8814   }
   8815 
   8816   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
   8817   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
   8818       isa<ConstantSDNode>(N0.getOperand(1)) &&
   8819       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
   8820       N0.hasOneUse()) {
   8821     SDValue ShAmt = N0.getOperand(1);
   8822     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
   8823     if (N0.getOpcode() == ISD::SHL) {
   8824       SDValue InnerZExt = N0.getOperand(0);
   8825       // If the original shl may be shifting out bits, do not perform this
   8826       // transformation.
   8827       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
   8828         InnerZExt.getOperand(0).getValueSizeInBits();
   8829       if (ShAmtVal > KnownZeroBits)
   8830         return SDValue();
   8831     }
   8832 
   8833     SDLoc DL(N);
   8834 
   8835     // Ensure that the shift amount is wide enough for the shifted value.
   8836     if (VT.getSizeInBits() >= 256)
   8837       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
   8838 
   8839     return DAG.getNode(N0.getOpcode(), DL, VT,
   8840                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
   8841                        ShAmt);
   8842   }
   8843 
   8844   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
   8845     return NewVSel;
   8846 
   8847   return SDValue();
   8848 }
   8849 
   8850 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
   8851   SDValue N0 = N->getOperand(0);
   8852   EVT VT = N->getValueType(0);
   8853 
   8854   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
   8855                                               LegalOperations))
   8856     return SDValue(Res, 0);
   8857 
   8858   // fold (aext (aext x)) -> (aext x)
   8859   // fold (aext (zext x)) -> (zext x)
   8860   // fold (aext (sext x)) -> (sext x)
   8861   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
   8862       N0.getOpcode() == ISD::ZERO_EXTEND ||
   8863       N0.getOpcode() == ISD::SIGN_EXTEND)
   8864     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
   8865 
   8866   // fold (aext (truncate (load x))) -> (aext (smaller load x))
   8867   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
   8868   if (N0.getOpcode() == ISD::TRUNCATE) {
   8869     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
   8870       SDNode *oye = N0.getOperand(0).getNode();
   8871       if (NarrowLoad.getNode() != N0.getNode()) {
   8872         CombineTo(N0.getNode(), NarrowLoad);
   8873         // CombineTo deleted the truncate, if needed, but not what's under it.
   8874         AddToWorklist(oye);
   8875       }
   8876       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   8877     }
   8878   }
   8879 
   8880   // fold (aext (truncate x))
   8881   if (N0.getOpcode() == ISD::TRUNCATE)
   8882     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
   8883 
   8884   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
   8885   // if the trunc is not free.
   8886   if (N0.getOpcode() == ISD::AND &&
   8887       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
   8888       N0.getOperand(1).getOpcode() == ISD::Constant &&
   8889       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
   8890                           N0.getValueType())) {
   8891     SDLoc DL(N);
   8892     SDValue X = N0.getOperand(0).getOperand(0);
   8893     X = DAG.getAnyExtOrTrunc(X, DL, VT);
   8894     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
   8895     Mask = Mask.zext(VT.getSizeInBits());
   8896     return DAG.getNode(ISD::AND, DL, VT,
   8897                        X, DAG.getConstant(Mask, DL, VT));
   8898   }
   8899 
   8900   // fold (aext (load x)) -> (aext (truncate (extload x)))
   8901   // None of the supported targets knows how to perform load and any_ext
   8902   // on vectors in one instruction.  We only perform this transformation on
   8903   // scalars.
   8904   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
   8905       ISD::isUNINDEXEDLoad(N0.getNode()) &&
   8906       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
   8907     bool DoXform = true;
   8908     SmallVector<SDNode*, 4> SetCCs;
   8909     if (!N0.hasOneUse())
   8910       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
   8911                                         TLI);
   8912     if (DoXform) {
   8913       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   8914       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
   8915                                        LN0->getChain(),
   8916                                        LN0->getBasePtr(), N0.getValueType(),
   8917                                        LN0->getMemOperand());
   8918       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
   8919       // If the load value is used only by N, replace it via CombineTo N.
   8920       bool NoReplaceTrunc = N0.hasOneUse();
   8921       CombineTo(N, ExtLoad);
   8922       if (NoReplaceTrunc) {
   8923         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
   8924       } else {
   8925         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
   8926                                     N0.getValueType(), ExtLoad);
   8927         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
   8928       }
   8929       return SDValue(N, 0); // Return N so it doesn't get rechecked!
   8930     }
   8931   }
   8932 
   8933   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
   8934   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
   8935   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
   8936   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
   8937       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
   8938     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   8939     ISD::LoadExtType ExtType = LN0->getExtensionType();
   8940     EVT MemVT = LN0->getMemoryVT();
   8941     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
   8942       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
   8943                                        VT, LN0->getChain(), LN0->getBasePtr(),
   8944                                        MemVT, LN0->getMemOperand());
   8945       CombineTo(N, ExtLoad);
   8946       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
   8947       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   8948     }
   8949   }
   8950 
   8951   if (N0.getOpcode() == ISD::SETCC) {
   8952     // For vectors:
   8953     // aext(setcc) -> vsetcc
   8954     // aext(setcc) -> truncate(vsetcc)
   8955     // aext(setcc) -> aext(vsetcc)
   8956     // Only do this before legalize for now.
   8957     if (VT.isVector() && !LegalOperations) {
   8958       EVT N00VT = N0.getOperand(0).getValueType();
   8959       if (getSetCCResultType(N00VT) == N0.getValueType())
   8960         return SDValue();
   8961 
   8962       // We know that the # elements of the results is the same as the
   8963       // # elements of the compare (and the # elements of the compare result
   8964       // for that matter).  Check to see that they are the same size.  If so,
   8965       // we know that the element size of the sext'd result matches the
   8966       // element size of the compare operands.
   8967       if (VT.getSizeInBits() == N00VT.getSizeInBits())
   8968         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
   8969                              N0.getOperand(1),
   8970                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
   8971       // If the desired elements are smaller or larger than the source
   8972       // elements we can use a matching integer vector type and then
   8973       // truncate/any extend
   8974       else {
   8975         EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
   8976         SDValue VsetCC =
   8977           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
   8978                         N0.getOperand(1),
   8979                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
   8980         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
   8981       }
   8982     }
   8983 
   8984     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
   8985     SDLoc DL(N);
   8986     if (SDValue SCC = SimplifySelectCC(
   8987             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
   8988             DAG.getConstant(0, DL, VT),
   8989             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
   8990       return SCC;
   8991   }
   8992 
   8993   return SDValue();
   8994 }
   8995 
   8996 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
   8997   unsigned Opcode = N->getOpcode();
   8998   SDValue N0 = N->getOperand(0);
   8999   SDValue N1 = N->getOperand(1);
   9000   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
   9001 
   9002   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
   9003   if (N0.getOpcode() == Opcode &&
   9004       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
   9005     return N0;
   9006 
   9007   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
   9008       N0.getOperand(0).getOpcode() == Opcode) {
   9009     // We have an assert, truncate, assert sandwich. Make one stronger assert
   9010     // by asserting on the smallest asserted type to the larger source type.
   9011     // This eliminates the later assert:
   9012     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
   9013     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
   9014     SDValue BigA = N0.getOperand(0);
   9015     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
   9016     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
   9017            "Asserting zero/sign-extended bits to a type larger than the "
   9018            "truncated destination does not provide information");
   9019 
   9020     SDLoc DL(N);
   9021     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
   9022     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
   9023     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
   9024                                     BigA.getOperand(0), MinAssertVTVal);
   9025     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
   9026   }
   9027 
   9028   return SDValue();
   9029 }
   9030 
   9031 /// If the result of a wider load is shifted to right of N  bits and then
   9032 /// truncated to a narrower type and where N is a multiple of number of bits of
   9033 /// the narrower type, transform it to a narrower load from address + N / num of
   9034 /// bits of new type. Also narrow the load if the result is masked with an AND
   9035 /// to effectively produce a smaller type. If the result is to be extended, also
   9036 /// fold the extension to form a extending load.
   9037 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   9038   unsigned Opc = N->getOpcode();
   9039 
   9040   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
   9041   SDValue N0 = N->getOperand(0);
   9042   EVT VT = N->getValueType(0);
   9043   EVT ExtVT = VT;
   9044 
   9045   // This transformation isn't valid for vector loads.
   9046   if (VT.isVector())
   9047     return SDValue();
   9048 
   9049   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
   9050   // extended to VT.
   9051   if (Opc == ISD::SIGN_EXTEND_INREG) {
   9052     ExtType = ISD::SEXTLOAD;
   9053     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
   9054   } else if (Opc == ISD::SRL) {
   9055     // Another special-case: SRL is basically zero-extending a narrower value,
   9056     // or it maybe shifting a higher subword, half or byte into the lowest
   9057     // bits.
   9058     ExtType = ISD::ZEXTLOAD;
   9059     N0 = SDValue(N, 0);
   9060 
   9061     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
   9062     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   9063     if (!N01 || !LN0)
   9064       return SDValue();
   9065 
   9066     uint64_t ShiftAmt = N01->getZExtValue();
   9067     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
   9068     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
   9069       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
   9070     else
   9071       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
   9072                                 VT.getSizeInBits() - ShiftAmt);
   9073   } else if (Opc == ISD::AND) {
   9074     // An AND with a constant mask is the same as a truncate + zero-extend.
   9075     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
   9076     if (!AndC || !AndC->getAPIntValue().isMask())
   9077       return SDValue();
   9078 
   9079     unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
   9080     ExtType = ISD::ZEXTLOAD;
   9081     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
   9082   }
   9083 
   9084   unsigned ShAmt = 0;
   9085   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
   9086     SDValue SRL = N0;
   9087     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
   9088       ShAmt = ConstShift->getZExtValue();
   9089       unsigned EVTBits = ExtVT.getSizeInBits();
   9090       // Is the shift amount a multiple of size of VT?
   9091       if ((ShAmt & (EVTBits-1)) == 0) {
   9092         N0 = N0.getOperand(0);
   9093         // Is the load width a multiple of size of VT?
   9094         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
   9095           return SDValue();
   9096       }
   9097 
   9098       // At this point, we must have a load or else we can't do the transform.
   9099       if (!isa<LoadSDNode>(N0)) return SDValue();
   9100 
   9101       auto *LN0 = cast<LoadSDNode>(N0);
   9102 
   9103       // Because a SRL must be assumed to *need* to zero-extend the high bits
   9104       // (as opposed to anyext the high bits), we can't combine the zextload
   9105       // lowering of SRL and an sextload.
   9106       if (LN0->getExtensionType() == ISD::SEXTLOAD)
   9107         return SDValue();
   9108 
   9109       // If the shift amount is larger than the input type then we're not
   9110       // accessing any of the loaded bytes.  If the load was a zextload/extload
   9111       // then the result of the shift+trunc is zero/undef (handled elsewhere).
   9112       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
   9113         return SDValue();
   9114 
   9115       // If the SRL is only used by a masking AND, we may be able to adjust
   9116       // the ExtVT to make the AND redundant.
   9117       SDNode *Mask = *(SRL->use_begin());
   9118       if (Mask->getOpcode() == ISD::AND &&
   9119           isa<ConstantSDNode>(Mask->getOperand(1))) {
   9120         const APInt &ShiftMask =
   9121           cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
   9122         if (ShiftMask.isMask()) {
   9123           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
   9124                                            ShiftMask.countTrailingOnes());
   9125           // If the mask is smaller, recompute the type.
   9126           if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
   9127               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
   9128             ExtVT = MaskedVT;
   9129         }
   9130       }
   9131     }
   9132   }
   9133 
   9134   // If the load is shifted left (and the result isn't shifted back right),
   9135   // we can fold the truncate through the shift.
   9136   unsigned ShLeftAmt = 0;
   9137   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
   9138       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
   9139     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
   9140       ShLeftAmt = N01->getZExtValue();
   9141       N0 = N0.getOperand(0);
   9142     }
   9143   }
   9144 
   9145   // If we haven't found a load, we can't narrow it.
   9146   if (!isa<LoadSDNode>(N0))
   9147     return SDValue();
   9148 
   9149   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   9150   if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
   9151     return SDValue();
   9152 
   9153   // For big endian targets, we need to adjust the offset to the pointer to
   9154   // load the correct bytes.
   9155   if (DAG.getDataLayout().isBigEndian()) {
   9156     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
   9157     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
   9158     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
   9159   }
   9160 
   9161   EVT PtrType = N0.getOperand(1).getValueType();
   9162   uint64_t PtrOff = ShAmt / 8;
   9163   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
   9164   SDLoc DL(LN0);
   9165   // The original load itself didn't wrap, so an offset within it doesn't.
   9166   SDNodeFlags Flags;
   9167   Flags.setNoUnsignedWrap(true);
   9168   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
   9169                                PtrType, LN0->getBasePtr(),
   9170                                DAG.getConstant(PtrOff, DL, PtrType),
   9171                                Flags);
   9172   AddToWorklist(NewPtr.getNode());
   9173 
   9174   SDValue Load;
   9175   if (ExtType == ISD::NON_EXTLOAD)
   9176     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
   9177                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
   9178                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
   9179   else
   9180     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
   9181                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
   9182                           NewAlign, LN0->getMemOperand()->getFlags(),
   9183                           LN0->getAAInfo());
   9184 
   9185   // Replace the old load's chain with the new load's chain.
   9186   WorklistRemover DeadNodes(*this);
   9187   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
   9188 
   9189   // Shift the result left, if we've swallowed a left shift.
   9190   SDValue Result = Load;
   9191   if (ShLeftAmt != 0) {
   9192     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
   9193     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
   9194       ShImmTy = VT;
   9195     // If the shift amount is as large as the result size (but, presumably,
   9196     // no larger than the source) then the useful bits of the result are
   9197     // zero; we can't simply return the shortened shift, because the result
   9198     // of that operation is undefined.
   9199     SDLoc DL(N0);
   9200     if (ShLeftAmt >= VT.getSizeInBits())
   9201       Result = DAG.getConstant(0, DL, VT);
   9202     else
   9203       Result = DAG.getNode(ISD::SHL, DL, VT,
   9204                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
   9205   }
   9206 
   9207   // Return the new loaded value.
   9208   return Result;
   9209 }
   9210 
   9211 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   9212   SDValue N0 = N->getOperand(0);
   9213   SDValue N1 = N->getOperand(1);
   9214   EVT VT = N->getValueType(0);
   9215   EVT EVT = cast<VTSDNode>(N1)->getVT();
   9216   unsigned VTBits = VT.getScalarSizeInBits();
   9217   unsigned EVTBits = EVT.getScalarSizeInBits();
   9218 
   9219   if (N0.isUndef())
   9220     return DAG.getUNDEF(VT);
   9221 
   9222   // fold (sext_in_reg c1) -> c1
   9223   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
   9224     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
   9225 
   9226   // If the input is already sign extended, just drop the extension.
   9227   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
   9228     return N0;
   9229 
   9230   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
   9231   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
   9232       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
   9233     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
   9234                        N0.getOperand(0), N1);
   9235 
   9236   // fold (sext_in_reg (sext x)) -> (sext x)
   9237   // fold (sext_in_reg (aext x)) -> (sext x)
   9238   // if x is small enough.
   9239   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
   9240     SDValue N00 = N0.getOperand(0);
   9241     if (N00.getScalarValueSizeInBits() <= EVTBits &&
   9242         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
   9243       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
   9244   }
   9245 
   9246   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
   9247   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
   9248        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
   9249        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
   9250       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
   9251     if (!LegalOperations ||
   9252         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
   9253       return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
   9254   }
   9255 
   9256   // fold (sext_in_reg (zext x)) -> (sext x)
   9257   // iff we are extending the source sign bit.
   9258   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
   9259     SDValue N00 = N0.getOperand(0);
   9260     if (N00.getScalarValueSizeInBits() == EVTBits &&
   9261         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
   9262       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
   9263   }
   9264 
   9265   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
   9266   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
   9267     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
   9268 
   9269   // fold operands of sext_in_reg based on knowledge that the top bits are not
   9270   // demanded.
   9271   if (SimplifyDemandedBits(SDValue(N, 0)))
   9272     return SDValue(N, 0);
   9273 
   9274   // fold (sext_in_reg (load x)) -> (smaller sextload x)
   9275   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
   9276   if (SDValue NarrowLoad = ReduceLoadWidth(N))
   9277     return NarrowLoad;
   9278 
   9279   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
   9280   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
   9281   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
   9282   if (N0.getOpcode() == ISD::SRL) {
   9283     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
   9284       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
   9285         // We can turn this into an SRA iff the input to the SRL is already sign
   9286         // extended enough.
   9287         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
   9288         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
   9289           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
   9290                              N0.getOperand(0), N0.getOperand(1));
   9291       }
   9292   }
   9293 
   9294   // fold (sext_inreg (extload x)) -> (sextload x)
   9295   // If sextload is not supported by target, we can only do the combine when
   9296   // load has one use. Doing otherwise can block folding the extload with other
   9297   // extends that the target does support.
   9298   if (ISD::isEXTLoad(N0.getNode()) &&
   9299       ISD::isUNINDEXEDLoad(N0.getNode()) &&
   9300       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
   9301       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
   9302         N0.hasOneUse()) ||
   9303        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
   9304     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   9305     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
   9306                                      LN0->getChain(),
   9307                                      LN0->getBasePtr(), EVT,
   9308                                      LN0->getMemOperand());
   9309     CombineTo(N, ExtLoad);
   9310     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
   9311     AddToWorklist(ExtLoad.getNode());
   9312     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   9313   }
   9314   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
   9315   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
   9316       N0.hasOneUse() &&
   9317       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
   9318       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
   9319        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
   9320     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   9321     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
   9322                                      LN0->getChain(),
   9323                                      LN0->getBasePtr(), EVT,
   9324                                      LN0->getMemOperand());
   9325     CombineTo(N, ExtLoad);
   9326     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
   9327     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   9328   }
   9329 
   9330   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
   9331   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
   9332     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
   9333                                            N0.getOperand(1), false))
   9334       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
   9335                          BSwap, N1);
   9336   }
   9337 
   9338   return SDValue();
   9339 }
   9340 
   9341 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
   9342   SDValue N0 = N->getOperand(0);
   9343   EVT VT = N->getValueType(0);
   9344 
   9345   if (N0.isUndef())
   9346     return DAG.getUNDEF(VT);
   9347 
   9348   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
   9349                                               LegalOperations))
   9350     return SDValue(Res, 0);
   9351 
   9352   return SDValue();
   9353 }
   9354 
   9355 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
   9356   SDValue N0 = N->getOperand(0);
   9357   EVT VT = N->getValueType(0);
   9358 
   9359   if (N0.isUndef())
   9360     return DAG.getUNDEF(VT);
   9361 
   9362   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
   9363                                               LegalOperations))
   9364     return SDValue(Res, 0);
   9365 
   9366   return SDValue();
   9367 }
   9368 
   9369 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   9370   SDValue N0 = N->getOperand(0);
   9371   EVT VT = N->getValueType(0);
   9372   bool isLE = DAG.getDataLayout().isLittleEndian();
   9373 
   9374   // noop truncate
   9375   if (N0.getValueType() == N->getValueType(0))
   9376     return N0;
   9377 
   9378   // fold (truncate (truncate x)) -> (truncate x)
   9379   if (N0.getOpcode() == ISD::TRUNCATE)
   9380     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
   9381 
   9382   // fold (truncate c1) -> c1
   9383   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
   9384     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
   9385     if (C.getNode() != N)
   9386       return C;
   9387   }
   9388 
   9389   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
   9390   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
   9391       N0.getOpcode() == ISD::SIGN_EXTEND ||
   9392       N0.getOpcode() == ISD::ANY_EXTEND) {
   9393     // if the source is smaller than the dest, we still need an extend.
   9394     if (N0.getOperand(0).getValueType().bitsLT(VT))
   9395       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
   9396     // if the source is larger than the dest, than we just need the truncate.
   9397     if (N0.getOperand(0).getValueType().bitsGT(VT))
   9398       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
   9399     // if the source and dest are the same type, we can drop both the extend
   9400     // and the truncate.
   9401     return N0.getOperand(0);
   9402   }
   9403 
   9404   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
   9405   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
   9406     return SDValue();
   9407 
   9408   // Fold extract-and-trunc into a narrow extract. For example:
   9409   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
   9410   //   i32 y = TRUNCATE(i64 x)
   9411   //        -- becomes --
   9412   //   v16i8 b = BITCAST (v2i64 val)
   9413   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
   9414   //
   9415   // Note: We only run this optimization after type legalization (which often
   9416   // creates this pattern) and before operation legalization after which
   9417   // we need to be more careful about the vector instructions that we generate.
   9418   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
   9419       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
   9420     EVT VecTy = N0.getOperand(0).getValueType();
   9421     EVT ExTy = N0.getValueType();
   9422     EVT TrTy = N->getValueType(0);
   9423 
   9424     unsigned NumElem = VecTy.getVectorNumElements();
   9425     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
   9426 
   9427     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
   9428     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
   9429 
   9430     SDValue EltNo = N0->getOperand(1);
   9431     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
   9432       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
   9433       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
   9434       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
   9435 
   9436       SDLoc DL(N);
   9437       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
   9438                          DAG.getBitcast(NVT, N0.getOperand(0)),
   9439                          DAG.getConstant(Index, DL, IndexTy));
   9440     }
   9441   }
   9442 
   9443   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
   9444   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
   9445     EVT SrcVT = N0.getValueType();
   9446     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
   9447         TLI.isTruncateFree(SrcVT, VT)) {
   9448       SDLoc SL(N0);
   9449       SDValue Cond = N0.getOperand(0);
   9450       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
   9451       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
   9452       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
   9453     }
   9454   }
   9455 
   9456   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
   9457   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
   9458       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
   9459       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
   9460     SDValue Amt = N0.getOperand(1);
   9461     KnownBits Known;
   9462     DAG.computeKnownBits(Amt, Known);
   9463     unsigned Size = VT.getScalarSizeInBits();
   9464     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
   9465       SDLoc SL(N);
   9466       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
   9467 
   9468       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
   9469       if (AmtVT != Amt.getValueType()) {
   9470         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
   9471         AddToWorklist(Amt.getNode());
   9472       }
   9473       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
   9474     }
   9475   }
   9476 
   9477   // Fold a series of buildvector, bitcast, and truncate if possible.
   9478   // For example fold
   9479   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
   9480   //   (2xi32 (buildvector x, y)).
   9481   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
   9482       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
   9483       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
   9484       N0.getOperand(0).hasOneUse()) {
   9485     SDValue BuildVect = N0.getOperand(0);
   9486     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
   9487     EVT TruncVecEltTy = VT.getVectorElementType();
   9488 
   9489     // Check that the element types match.
   9490     if (BuildVectEltTy == TruncVecEltTy) {
   9491       // Now we only need to compute the offset of the truncated elements.
   9492       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
   9493       unsigned TruncVecNumElts = VT.getVectorNumElements();
   9494       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
   9495 
   9496       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
   9497              "Invalid number of elements");
   9498 
   9499       SmallVector<SDValue, 8> Opnds;
   9500       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
   9501         Opnds.push_back(BuildVect.getOperand(i));
   9502 
   9503       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
   9504     }
   9505   }
   9506 
   9507   // See if we can simplify the input to this truncate through knowledge that
   9508   // only the low bits are being used.
   9509   // For example "trunc (or (shl x, 8), y)" // -> trunc y
   9510   // Currently we only perform this optimization on scalars because vectors
   9511   // may have different active low bits.
   9512   if (!VT.isVector()) {
   9513     APInt Mask =
   9514         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
   9515     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
   9516       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
   9517   }
   9518 
   9519   // fold (truncate (load x)) -> (smaller load x)
   9520   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
   9521   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
   9522     if (SDValue Reduced = ReduceLoadWidth(N))
   9523       return Reduced;
   9524 
   9525     // Handle the case where the load remains an extending load even
   9526     // after truncation.
   9527     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
   9528       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   9529       if (!LN0->isVolatile() &&
   9530           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
   9531         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
   9532                                          VT, LN0->getChain(), LN0->getBasePtr(),
   9533                                          LN0->getMemoryVT(),
   9534                                          LN0->getMemOperand());
   9535         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
   9536         return NewLoad;
   9537       }
   9538     }
   9539   }
   9540 
   9541   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
   9542   // where ... are all 'undef'.
   9543   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
   9544     SmallVector<EVT, 8> VTs;
   9545     SDValue V;
   9546     unsigned Idx = 0;
   9547     unsigned NumDefs = 0;
   9548 
   9549     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
   9550       SDValue X = N0.getOperand(i);
   9551       if (!X.isUndef()) {
   9552         V = X;
   9553         Idx = i;
   9554         NumDefs++;
   9555       }
   9556       // Stop if more than one members are non-undef.
   9557       if (NumDefs > 1)
   9558         break;
   9559       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
   9560                                      VT.getVectorElementType(),
   9561                                      X.getValueType().getVectorNumElements()));
   9562     }
   9563 
   9564     if (NumDefs == 0)
   9565       return DAG.getUNDEF(VT);
   9566 
   9567     if (NumDefs == 1) {
   9568       assert(V.getNode() && "The single defined operand is empty!");
   9569       SmallVector<SDValue, 8> Opnds;
   9570       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
   9571         if (i != Idx) {
   9572           Opnds.push_back(DAG.getUNDEF(VTs[i]));
   9573           continue;
   9574         }
   9575         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
   9576         AddToWorklist(NV.getNode());
   9577         Opnds.push_back(NV);
   9578       }
   9579       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
   9580     }
   9581   }
   9582 
   9583   // Fold truncate of a bitcast of a vector to an extract of the low vector
   9584   // element.
   9585   //
   9586   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
   9587   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
   9588     SDValue VecSrc = N0.getOperand(0);
   9589     EVT SrcVT = VecSrc.getValueType();
   9590     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
   9591         (!LegalOperations ||
   9592          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
   9593       SDLoc SL(N);
   9594 
   9595       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
   9596       unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
   9597       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
   9598                          VecSrc, DAG.getConstant(Idx, SL, IdxVT));
   9599     }
   9600   }
   9601 
   9602   // Simplify the operands using demanded-bits information.
   9603   if (!VT.isVector() &&
   9604       SimplifyDemandedBits(SDValue(N, 0)))
   9605     return SDValue(N, 0);
   9606 
   9607   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
   9608   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
   9609   // When the adde's carry is not used.
   9610   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
   9611       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
   9612       (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
   9613     SDLoc SL(N);
   9614     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
   9615     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
   9616     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
   9617     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
   9618   }
   9619 
   9620   // fold (truncate (extract_subvector(ext x))) ->
   9621   //      (extract_subvector x)
   9622   // TODO: This can be generalized to cover cases where the truncate and extract
   9623   // do not fully cancel each other out.
   9624   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
   9625     SDValue N00 = N0.getOperand(0);
   9626     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
   9627         N00.getOpcode() == ISD::ZERO_EXTEND ||
   9628         N00.getOpcode() == ISD::ANY_EXTEND) {
   9629       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
   9630           VT.getVectorElementType())
   9631         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
   9632                            N00.getOperand(0), N0.getOperand(1));
   9633     }
   9634   }
   9635 
   9636   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
   9637     return NewVSel;
   9638 
   9639   return SDValue();
   9640 }
   9641 
   9642 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
   9643   SDValue Elt = N->getOperand(i);
   9644   if (Elt.getOpcode() != ISD::MERGE_VALUES)
   9645     return Elt.getNode();
   9646   return Elt.getOperand(Elt.getResNo()).getNode();
   9647 }
   9648 
   9649 /// build_pair (load, load) -> load
   9650 /// if load locations are consecutive.
   9651 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
   9652   assert(N->getOpcode() == ISD::BUILD_PAIR);
   9653 
   9654   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
   9655   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
   9656 
   9657   // A BUILD_PAIR is always having the least significant part in elt 0 and the
   9658   // most significant part in elt 1. So when combining into one large load, we
   9659   // need to consider the endianness.
   9660   if (DAG.getDataLayout().isBigEndian())
   9661     std::swap(LD1, LD2);
   9662 
   9663   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
   9664       LD1->getAddressSpace() != LD2->getAddressSpace())
   9665     return SDValue();
   9666   EVT LD1VT = LD1->getValueType(0);
   9667   unsigned LD1Bytes = LD1VT.getStoreSize();
   9668   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
   9669       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
   9670     unsigned Align = LD1->getAlignment();
   9671     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
   9672         VT.getTypeForEVT(*DAG.getContext()));
   9673 
   9674     if (NewAlign <= Align &&
   9675         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
   9676       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
   9677                          LD1->getPointerInfo(), Align);
   9678   }
   9679 
   9680   return SDValue();
   9681 }
   9682 
   9683 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
   9684   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
   9685   // and Lo parts; on big-endian machines it doesn't.
   9686   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
   9687 }
   9688 
   9689 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
   9690                                     const TargetLowering &TLI) {
   9691   // If this is not a bitcast to an FP type or if the target doesn't have
   9692   // IEEE754-compliant FP logic, we're done.
   9693   EVT VT = N->getValueType(0);
   9694   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
   9695     return SDValue();
   9696 
   9697   // TODO: Use splat values for the constant-checking below and remove this
   9698   // restriction.
   9699   SDValue N0 = N->getOperand(0);
   9700   EVT SourceVT = N0.getValueType();
   9701   if (SourceVT.isVector())
   9702     return SDValue();
   9703 
   9704   unsigned FPOpcode;
   9705   APInt SignMask;
   9706   switch (N0.getOpcode()) {
   9707   case ISD::AND:
   9708     FPOpcode = ISD::FABS;
   9709     SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
   9710     break;
   9711   case ISD::XOR:
   9712     FPOpcode = ISD::FNEG;
   9713     SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
   9714     break;
   9715   // TODO: ISD::OR --> ISD::FNABS?
   9716   default:
   9717     return SDValue();
   9718   }
   9719 
   9720   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
   9721   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
   9722   SDValue LogicOp0 = N0.getOperand(0);
   9723   ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   9724   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
   9725       LogicOp0.getOpcode() == ISD::BITCAST &&
   9726       LogicOp0->getOperand(0).getValueType() == VT)
   9727     return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
   9728 
   9729   return SDValue();
   9730 }
   9731 
   9732 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   9733   SDValue N0 = N->getOperand(0);
   9734   EVT VT = N->getValueType(0);
   9735 
   9736   if (N0.isUndef())
   9737     return DAG.getUNDEF(VT);
   9738 
   9739   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
   9740   // Only do this before legalize, since afterward the target may be depending
   9741   // on the bitconvert.
   9742   // First check to see if this is all constant.
   9743   if (!LegalTypes &&
   9744       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
   9745       VT.isVector()) {
   9746     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
   9747 
   9748     EVT DestEltVT = N->getValueType(0).getVectorElementType();
   9749     assert(!DestEltVT.isVector() &&
   9750            "Element type of vector ValueType must not be vector!");
   9751     if (isSimple)
   9752       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
   9753   }
   9754 
   9755   // If the input is a constant, let getNode fold it.
   9756   // We always need to check that this is just a fp -> int or int -> conversion
   9757   // otherwise we will get back N which will confuse the caller into thinking
   9758   // we used CombineTo. This can block target combines from running. If we can't
   9759   // allowed legal operations, we need to ensure the resulting operation will be
   9760   // legal.
   9761   // TODO: Maybe we should check that the return value isn't N explicitly?
   9762   if ((isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
   9763        (!LegalOperations || TLI.isOperationLegal(ISD::ConstantFP, VT))) ||
   9764       (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
   9765        (!LegalOperations || TLI.isOperationLegal(ISD::Constant, VT))))
   9766     return DAG.getBitcast(VT, N0);
   9767 
   9768   // (conv (conv x, t1), t2) -> (conv x, t2)
   9769   if (N0.getOpcode() == ISD::BITCAST)
   9770     return DAG.getBitcast(VT, N0.getOperand(0));
   9771 
   9772   // fold (conv (load x)) -> (load (conv*)x)
   9773   // If the resultant load doesn't need a higher alignment than the original!
   9774   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
   9775       // Do not change the width of a volatile load.
   9776       !cast<LoadSDNode>(N0)->isVolatile() &&
   9777       // Do not remove the cast if the types differ in endian layout.
   9778       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
   9779           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
   9780       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
   9781       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
   9782     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   9783     unsigned OrigAlign = LN0->getAlignment();
   9784 
   9785     bool Fast = false;
   9786     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
   9787                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
   9788         Fast) {
   9789       SDValue Load =
   9790           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
   9791                       LN0->getPointerInfo(), OrigAlign,
   9792                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
   9793       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
   9794       return Load;
   9795     }
   9796   }
   9797 
   9798   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
   9799     return V;
   9800 
   9801   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
   9802   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
   9803   //
   9804   // For ppc_fp128:
   9805   // fold (bitcast (fneg x)) ->
   9806   //     flipbit = signbit
   9807   //     (xor (bitcast x) (build_pair flipbit, flipbit))
   9808   //
   9809   // fold (bitcast (fabs x)) ->
   9810   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
   9811   //     (xor (bitcast x) (build_pair flipbit, flipbit))
   9812   // This often reduces constant pool loads.
   9813   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
   9814        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
   9815       N0.getNode()->hasOneUse() && VT.isInteger() &&
   9816       !VT.isVector() && !N0.getValueType().isVector()) {
   9817     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
   9818     AddToWorklist(NewConv.getNode());
   9819 
   9820     SDLoc DL(N);
   9821     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
   9822       assert(VT.getSizeInBits() == 128);
   9823       SDValue SignBit = DAG.getConstant(
   9824           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
   9825       SDValue FlipBit;
   9826       if (N0.getOpcode() == ISD::FNEG) {
   9827         FlipBit = SignBit;
   9828         AddToWorklist(FlipBit.getNode());
   9829       } else {
   9830         assert(N0.getOpcode() == ISD::FABS);
   9831         SDValue Hi =
   9832             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
   9833                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
   9834                                               SDLoc(NewConv)));
   9835         AddToWorklist(Hi.getNode());
   9836         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
   9837         AddToWorklist(FlipBit.getNode());
   9838       }
   9839       SDValue FlipBits =
   9840           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
   9841       AddToWorklist(FlipBits.getNode());
   9842       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
   9843     }
   9844     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
   9845     if (N0.getOpcode() == ISD::FNEG)
   9846       return DAG.getNode(ISD::XOR, DL, VT,
   9847                          NewConv, DAG.getConstant(SignBit, DL, VT));
   9848     assert(N0.getOpcode() == ISD::FABS);
   9849     return DAG.getNode(ISD::AND, DL, VT,
   9850                        NewConv, DAG.getConstant(~SignBit, DL, VT));
   9851   }
   9852 
   9853   // fold (bitconvert (fcopysign cst, x)) ->
   9854   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
   9855   // Note that we don't handle (copysign x, cst) because this can always be
   9856   // folded to an fneg or fabs.
   9857   //
   9858   // For ppc_fp128:
   9859   // fold (bitcast (fcopysign cst, x)) ->
   9860   //     flipbit = (and (extract_element
   9861   //                     (xor (bitcast cst), (bitcast x)), 0),
   9862   //                    signbit)
   9863   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
   9864   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
   9865       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
   9866       VT.isInteger() && !VT.isVector()) {
   9867     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
   9868     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
   9869     if (isTypeLegal(IntXVT)) {
   9870       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
   9871       AddToWorklist(X.getNode());
   9872 
   9873       // If X has a different width than the result/lhs, sext it or truncate it.
   9874       unsigned VTWidth = VT.getSizeInBits();
   9875       if (OrigXWidth < VTWidth) {
   9876         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
   9877         AddToWorklist(X.getNode());
   9878       } else if (OrigXWidth > VTWidth) {
   9879         // To get the sign bit in the right place, we have to shift it right
   9880         // before truncating.
   9881         SDLoc DL(X);
   9882         X = DAG.getNode(ISD::SRL, DL,
   9883                         X.getValueType(), X,
   9884                         DAG.getConstant(OrigXWidth-VTWidth, DL,
   9885                                         X.getValueType()));
   9886         AddToWorklist(X.getNode());
   9887         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
   9888         AddToWorklist(X.getNode());
   9889       }
   9890 
   9891       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
   9892         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
   9893         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
   9894         AddToWorklist(Cst.getNode());
   9895         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
   9896         AddToWorklist(X.getNode());
   9897         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
   9898         AddToWorklist(XorResult.getNode());
   9899         SDValue XorResult64 = DAG.getNode(
   9900             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
   9901             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
   9902                                   SDLoc(XorResult)));
   9903         AddToWorklist(XorResult64.getNode());
   9904         SDValue FlipBit =
   9905             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
   9906                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
   9907         AddToWorklist(FlipBit.getNode());
   9908         SDValue FlipBits =
   9909             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
   9910         AddToWorklist(FlipBits.getNode());
   9911         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
   9912       }
   9913       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
   9914       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
   9915                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
   9916       AddToWorklist(X.getNode());
   9917 
   9918       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
   9919       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
   9920                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
   9921       AddToWorklist(Cst.getNode());
   9922 
   9923       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
   9924     }
   9925   }
   9926 
   9927   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
   9928   if (N0.getOpcode() == ISD::BUILD_PAIR)
   9929     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
   9930       return CombineLD;
   9931 
   9932   // Remove double bitcasts from shuffles - this is often a legacy of
   9933   // XformToShuffleWithZero being used to combine bitmaskings (of
   9934   // float vectors bitcast to integer vectors) into shuffles.
   9935   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
   9936   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
   9937       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
   9938       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
   9939       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
   9940     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
   9941 
   9942     // If operands are a bitcast, peek through if it casts the original VT.
   9943     // If operands are a constant, just bitcast back to original VT.
   9944     auto PeekThroughBitcast = [&](SDValue Op) {
   9945       if (Op.getOpcode() == ISD::BITCAST &&
   9946           Op.getOperand(0).getValueType() == VT)
   9947         return SDValue(Op.getOperand(0));
   9948       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
   9949           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
   9950         return DAG.getBitcast(VT, Op);
   9951       return SDValue();
   9952     };
   9953 
   9954     // FIXME: If either input vector is bitcast, try to convert the shuffle to
   9955     // the result type of this bitcast. This would eliminate at least one
   9956     // bitcast. See the transform in InstCombine.
   9957     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
   9958     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
   9959     if (!(SV0 && SV1))
   9960       return SDValue();
   9961 
   9962     int MaskScale =
   9963         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
   9964     SmallVector<int, 8> NewMask;
   9965     for (int M : SVN->getMask())
   9966       for (int i = 0; i != MaskScale; ++i)
   9967         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
   9968 
   9969     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
   9970     if (!LegalMask) {
   9971       std::swap(SV0, SV1);
   9972       ShuffleVectorSDNode::commuteMask(NewMask);
   9973       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
   9974     }
   9975 
   9976     if (LegalMask)
   9977       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
   9978   }
   9979 
   9980   return SDValue();
   9981 }
   9982 
   9983 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
   9984   EVT VT = N->getValueType(0);
   9985   return CombineConsecutiveLoads(N, VT);
   9986 }
   9987 
   9988 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
   9989 /// operands. DstEltVT indicates the destination element value type.
   9990 SDValue DAGCombiner::
   9991 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
   9992   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
   9993 
   9994   // If this is already the right type, we're done.
   9995   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
   9996 
   9997   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
   9998   unsigned DstBitSize = DstEltVT.getSizeInBits();
   9999 
   10000   // If this is a conversion of N elements of one type to N elements of another
   10001   // type, convert each element.  This handles FP<->INT cases.
   10002   if (SrcBitSize == DstBitSize) {
   10003     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
   10004                               BV->getValueType(0).getVectorNumElements());
   10005 
   10006     // Due to the FP element handling below calling this routine recursively,
   10007     // we can end up with a scalar-to-vector node here.
   10008     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
   10009       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
   10010                          DAG.getBitcast(DstEltVT, BV->getOperand(0)));
   10011 
   10012     SmallVector<SDValue, 8> Ops;
   10013     for (SDValue Op : BV->op_values()) {
   10014       // If the vector element type is not legal, the BUILD_VECTOR operands
   10015       // are promoted and implicitly truncated.  Make that explicit here.
   10016       if (Op.getValueType() != SrcEltVT)
   10017         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
   10018       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
   10019       AddToWorklist(Ops.back().getNode());
   10020     }
   10021     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
   10022   }
   10023 
   10024   // Otherwise, we're growing or shrinking the elements.  To avoid having to
   10025   // handle annoying details of growing/shrinking FP values, we convert them to
   10026   // int first.
   10027   if (SrcEltVT.isFloatingPoint()) {
   10028     // Convert the input float vector to a int vector where the elements are the
   10029     // same sizes.
   10030     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
   10031     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
   10032     SrcEltVT = IntVT;
   10033   }
   10034 
   10035   // Now we know the input is an integer vector.  If the output is a FP type,
   10036   // convert to integer first, then to FP of the right size.
   10037   if (DstEltVT.isFloatingPoint()) {
   10038     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
   10039     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
   10040 
   10041     // Next, convert to FP elements of the same size.
   10042     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
   10043   }
   10044 
   10045   SDLoc DL(BV);
   10046 
   10047   // Okay, we know the src/dst types are both integers of differing types.
   10048   // Handling growing first.
   10049   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
   10050   if (SrcBitSize < DstBitSize) {
   10051     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
   10052 
   10053     SmallVector<SDValue, 8> Ops;
   10054     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
   10055          i += NumInputsPerOutput) {
   10056       bool isLE = DAG.getDataLayout().isLittleEndian();
   10057       APInt NewBits = APInt(DstBitSize, 0);
   10058       bool EltIsUndef = true;
   10059       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
   10060         // Shift the previously computed bits over.
   10061         NewBits <<= SrcBitSize;
   10062         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
   10063         if (Op.isUndef()) continue;
   10064         EltIsUndef = false;
   10065 
   10066         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
   10067                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
   10068       }
   10069 
   10070       if (EltIsUndef)
   10071         Ops.push_back(DAG.getUNDEF(DstEltVT));
   10072       else
   10073         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
   10074     }
   10075 
   10076     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
   10077     return DAG.getBuildVector(VT, DL, Ops);
   10078   }
   10079 
   10080   // Finally, this must be the case where we are shrinking elements: each input
   10081   // turns into multiple outputs.
   10082   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
   10083   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
   10084                             NumOutputsPerInput*BV->getNumOperands());
   10085   SmallVector<SDValue, 8> Ops;
   10086 
   10087   for (const SDValue &Op : BV->op_values()) {
   10088     if (Op.isUndef()) {
   10089       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
   10090       continue;
   10091     }
   10092 
   10093     APInt OpVal = cast<ConstantSDNode>(Op)->
   10094                   getAPIntValue().zextOrTrunc(SrcBitSize);
   10095 
   10096     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
   10097       APInt ThisVal = OpVal.trunc(DstBitSize);
   10098       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
   10099       OpVal.lshrInPlace(DstBitSize);
   10100     }
   10101 
   10102     // For big endian targets, swap the order of the pieces of each element.
   10103     if (DAG.getDataLayout().isBigEndian())
   10104       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
   10105   }
   10106 
   10107   return DAG.getBuildVector(VT, DL, Ops);
   10108 }
   10109 
   10110 static bool isContractable(SDNode *N) {
   10111   SDNodeFlags F = N->getFlags();
   10112   return F.hasAllowContract() || F.hasAllowReassociation();
   10113 }
   10114 
   10115 /// Try to perform FMA combining on a given FADD node.
   10116 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
   10117   SDValue N0 = N->getOperand(0);
   10118   SDValue N1 = N->getOperand(1);
   10119   EVT VT = N->getValueType(0);
   10120   SDLoc SL(N);
   10121 
   10122   const TargetOptions &Options = DAG.getTarget().Options;
   10123 
   10124   // Floating-point multiply-add with intermediate rounding.
   10125   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
   10126 
   10127   // Floating-point multiply-add without intermediate rounding.
   10128   bool HasFMA =
   10129       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
   10130       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
   10131 
   10132   // No valid opcode, do not combine.
   10133   if (!HasFMAD && !HasFMA)
   10134     return SDValue();
   10135 
   10136   SDNodeFlags Flags = N->getFlags();
   10137   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
   10138   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
   10139                               CanFuse || HasFMAD);
   10140   // If the addition is not contractable, do not combine.
   10141   if (!AllowFusionGlobally && !isContractable(N))
   10142     return SDValue();
   10143 
   10144   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
   10145   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
   10146     return SDValue();
   10147 
   10148   // Always prefer FMAD to FMA for precision.
   10149   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
   10150   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
   10151 
   10152   // Is the node an FMUL and contractable either due to global flags or
   10153   // SDNodeFlags.
   10154   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
   10155     if (N.getOpcode() != ISD::FMUL)
   10156       return false;
   10157     return AllowFusionGlobally || isContractable(N.getNode());
   10158   };
   10159   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
   10160   // prefer to fold the multiply with fewer uses.
   10161   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
   10162     if (N0.getNode()->use_size() > N1.getNode()->use_size())
   10163       std::swap(N0, N1);
   10164   }
   10165 
   10166   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
   10167   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
   10168     return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10169                        N0.getOperand(0), N0.getOperand(1), N1, Flags);
   10170   }
   10171 
   10172   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
   10173   // Note: Commutes FADD operands.
   10174   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
   10175     return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10176                        N1.getOperand(0), N1.getOperand(1), N0, Flags);
   10177   }
   10178 
   10179   // Look through FP_EXTEND nodes to do more combining.
   10180 
   10181   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
   10182   if (N0.getOpcode() == ISD::FP_EXTEND) {
   10183     SDValue N00 = N0.getOperand(0);
   10184     if (isContractableFMUL(N00) &&
   10185         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
   10186       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10187                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10188                                      N00.getOperand(0)),
   10189                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10190                                      N00.getOperand(1)), N1, Flags);
   10191     }
   10192   }
   10193 
   10194   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
   10195   // Note: Commutes FADD operands.
   10196   if (N1.getOpcode() == ISD::FP_EXTEND) {
   10197     SDValue N10 = N1.getOperand(0);
   10198     if (isContractableFMUL(N10) &&
   10199         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
   10200       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10201                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10202                                      N10.getOperand(0)),
   10203                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10204                                      N10.getOperand(1)), N0, Flags);
   10205     }
   10206   }
   10207 
   10208   // More folding opportunities when target permits.
   10209   if (Aggressive) {
   10210     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
   10211     if (CanFuse &&
   10212         N0.getOpcode() == PreferredFusedOpcode &&
   10213         N0.getOperand(2).getOpcode() == ISD::FMUL &&
   10214         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
   10215       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10216                          N0.getOperand(0), N0.getOperand(1),
   10217                          DAG.getNode(PreferredFusedOpcode, SL, VT,
   10218                                      N0.getOperand(2).getOperand(0),
   10219                                      N0.getOperand(2).getOperand(1),
   10220                                      N1, Flags), Flags);
   10221     }
   10222 
   10223     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
   10224     if (CanFuse &&
   10225         N1->getOpcode() == PreferredFusedOpcode &&
   10226         N1.getOperand(2).getOpcode() == ISD::FMUL &&
   10227         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
   10228       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10229                          N1.getOperand(0), N1.getOperand(1),
   10230                          DAG.getNode(PreferredFusedOpcode, SL, VT,
   10231                                      N1.getOperand(2).getOperand(0),
   10232                                      N1.getOperand(2).getOperand(1),
   10233                                      N0, Flags), Flags);
   10234     }
   10235 
   10236 
   10237     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
   10238     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
   10239     auto FoldFAddFMAFPExtFMul = [&] (
   10240       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
   10241       SDNodeFlags Flags) {
   10242       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
   10243                          DAG.getNode(PreferredFusedOpcode, SL, VT,
   10244                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
   10245                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
   10246                                      Z, Flags), Flags);
   10247     };
   10248     if (N0.getOpcode() == PreferredFusedOpcode) {
   10249       SDValue N02 = N0.getOperand(2);
   10250       if (N02.getOpcode() == ISD::FP_EXTEND) {
   10251         SDValue N020 = N02.getOperand(0);
   10252         if (isContractableFMUL(N020) &&
   10253             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
   10254           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
   10255                                       N020.getOperand(0), N020.getOperand(1),
   10256                                       N1, Flags);
   10257         }
   10258       }
   10259     }
   10260 
   10261     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
   10262     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
   10263     // FIXME: This turns two single-precision and one double-precision
   10264     // operation into two double-precision operations, which might not be
   10265     // interesting for all targets, especially GPUs.
   10266     auto FoldFAddFPExtFMAFMul = [&] (
   10267       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
   10268       SDNodeFlags Flags) {
   10269       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10270                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
   10271                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
   10272                          DAG.getNode(PreferredFusedOpcode, SL, VT,
   10273                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
   10274                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
   10275                                      Z, Flags), Flags);
   10276     };
   10277     if (N0.getOpcode() == ISD::FP_EXTEND) {
   10278       SDValue N00 = N0.getOperand(0);
   10279       if (N00.getOpcode() == PreferredFusedOpcode) {
   10280         SDValue N002 = N00.getOperand(2);
   10281         if (isContractableFMUL(N002) &&
   10282             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
   10283           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
   10284                                       N002.getOperand(0), N002.getOperand(1),
   10285                                       N1, Flags);
   10286         }
   10287       }
   10288     }
   10289 
   10290     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
   10291     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
   10292     if (N1.getOpcode() == PreferredFusedOpcode) {
   10293       SDValue N12 = N1.getOperand(2);
   10294       if (N12.getOpcode() == ISD::FP_EXTEND) {
   10295         SDValue N120 = N12.getOperand(0);
   10296         if (isContractableFMUL(N120) &&
   10297             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
   10298           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
   10299                                       N120.getOperand(0), N120.getOperand(1),
   10300                                       N0, Flags);
   10301         }
   10302       }
   10303     }
   10304 
   10305     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
   10306     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
   10307     // FIXME: This turns two single-precision and one double-precision
   10308     // operation into two double-precision operations, which might not be
   10309     // interesting for all targets, especially GPUs.
   10310     if (N1.getOpcode() == ISD::FP_EXTEND) {
   10311       SDValue N10 = N1.getOperand(0);
   10312       if (N10.getOpcode() == PreferredFusedOpcode) {
   10313         SDValue N102 = N10.getOperand(2);
   10314         if (isContractableFMUL(N102) &&
   10315             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
   10316           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
   10317                                       N102.getOperand(0), N102.getOperand(1),
   10318                                       N0, Flags);
   10319         }
   10320       }
   10321     }
   10322   }
   10323 
   10324   return SDValue();
   10325 }
   10326 
   10327 /// Try to perform FMA combining on a given FSUB node.
   10328 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
   10329   SDValue N0 = N->getOperand(0);
   10330   SDValue N1 = N->getOperand(1);
   10331   EVT VT = N->getValueType(0);
   10332   SDLoc SL(N);
   10333 
   10334   const TargetOptions &Options = DAG.getTarget().Options;
   10335   // Floating-point multiply-add with intermediate rounding.
   10336   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
   10337 
   10338   // Floating-point multiply-add without intermediate rounding.
   10339   bool HasFMA =
   10340       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
   10341       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
   10342 
   10343   // No valid opcode, do not combine.
   10344   if (!HasFMAD && !HasFMA)
   10345     return SDValue();
   10346 
   10347   const SDNodeFlags Flags = N->getFlags();
   10348   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
   10349   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
   10350                               CanFuse || HasFMAD);
   10351 
   10352   // If the subtraction is not contractable, do not combine.
   10353   if (!AllowFusionGlobally && !isContractable(N))
   10354     return SDValue();
   10355 
   10356   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
   10357   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
   10358     return SDValue();
   10359 
   10360   // Always prefer FMAD to FMA for precision.
   10361   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
   10362   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
   10363 
   10364   // Is the node an FMUL and contractable either due to global flags or
   10365   // SDNodeFlags.
   10366   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
   10367     if (N.getOpcode() != ISD::FMUL)
   10368       return false;
   10369     return AllowFusionGlobally || isContractable(N.getNode());
   10370   };
   10371 
   10372   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
   10373   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
   10374     return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10375                        N0.getOperand(0), N0.getOperand(1),
   10376                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
   10377   }
   10378 
   10379   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
   10380   // Note: Commutes FSUB operands.
   10381   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
   10382     return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10383                        DAG.getNode(ISD::FNEG, SL, VT,
   10384                                    N1.getOperand(0)),
   10385                        N1.getOperand(1), N0, Flags);
   10386   }
   10387 
   10388   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
   10389   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
   10390       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
   10391     SDValue N00 = N0.getOperand(0).getOperand(0);
   10392     SDValue N01 = N0.getOperand(0).getOperand(1);
   10393     return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10394                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
   10395                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
   10396   }
   10397 
   10398   // Look through FP_EXTEND nodes to do more combining.
   10399 
   10400   // fold (fsub (fpext (fmul x, y)), z)
   10401   //   -> (fma (fpext x), (fpext y), (fneg z))
   10402   if (N0.getOpcode() == ISD::FP_EXTEND) {
   10403     SDValue N00 = N0.getOperand(0);
   10404     if (isContractableFMUL(N00) &&
   10405         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
   10406       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10407                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10408                                      N00.getOperand(0)),
   10409                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10410                                      N00.getOperand(1)),
   10411                          DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
   10412     }
   10413   }
   10414 
   10415   // fold (fsub x, (fpext (fmul y, z)))
   10416   //   -> (fma (fneg (fpext y)), (fpext z), x)
   10417   // Note: Commutes FSUB operands.
   10418   if (N1.getOpcode() == ISD::FP_EXTEND) {
   10419     SDValue N10 = N1.getOperand(0);
   10420     if (isContractableFMUL(N10) &&
   10421         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
   10422       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10423                          DAG.getNode(ISD::FNEG, SL, VT,
   10424                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10425                                                  N10.getOperand(0))),
   10426                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10427                                      N10.getOperand(1)),
   10428                          N0, Flags);
   10429     }
   10430   }
   10431 
   10432   // fold (fsub (fpext (fneg (fmul, x, y))), z)
   10433   //   -> (fneg (fma (fpext x), (fpext y), z))
   10434   // Note: This could be removed with appropriate canonicalization of the
   10435   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
   10436   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
   10437   // from implementing the canonicalization in visitFSUB.
   10438   if (N0.getOpcode() == ISD::FP_EXTEND) {
   10439     SDValue N00 = N0.getOperand(0);
   10440     if (N00.getOpcode() == ISD::FNEG) {
   10441       SDValue N000 = N00.getOperand(0);
   10442       if (isContractableFMUL(N000) &&
   10443           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
   10444         return DAG.getNode(ISD::FNEG, SL, VT,
   10445                            DAG.getNode(PreferredFusedOpcode, SL, VT,
   10446                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10447                                                    N000.getOperand(0)),
   10448                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10449                                                    N000.getOperand(1)),
   10450                                        N1, Flags));
   10451       }
   10452     }
   10453   }
   10454 
   10455   // fold (fsub (fneg (fpext (fmul, x, y))), z)
   10456   //   -> (fneg (fma (fpext x)), (fpext y), z)
   10457   // Note: This could be removed with appropriate canonicalization of the
   10458   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
   10459   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
   10460   // from implementing the canonicalization in visitFSUB.
   10461   if (N0.getOpcode() == ISD::FNEG) {
   10462     SDValue N00 = N0.getOperand(0);
   10463     if (N00.getOpcode() == ISD::FP_EXTEND) {
   10464       SDValue N000 = N00.getOperand(0);
   10465       if (isContractableFMUL(N000) &&
   10466           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
   10467         return DAG.getNode(ISD::FNEG, SL, VT,
   10468                            DAG.getNode(PreferredFusedOpcode, SL, VT,
   10469                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10470                                                    N000.getOperand(0)),
   10471                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10472                                                    N000.getOperand(1)),
   10473                                        N1, Flags));
   10474       }
   10475     }
   10476   }
   10477 
   10478   // More folding opportunities when target permits.
   10479   if (Aggressive) {
   10480     // fold (fsub (fma x, y, (fmul u, v)), z)
   10481     //   -> (fma x, y (fma u, v, (fneg z)))
   10482     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
   10483         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
   10484         N0.getOperand(2)->hasOneUse()) {
   10485       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10486                          N0.getOperand(0), N0.getOperand(1),
   10487                          DAG.getNode(PreferredFusedOpcode, SL, VT,
   10488                                      N0.getOperand(2).getOperand(0),
   10489                                      N0.getOperand(2).getOperand(1),
   10490                                      DAG.getNode(ISD::FNEG, SL, VT,
   10491                                                  N1), Flags), Flags);
   10492     }
   10493 
   10494     // fold (fsub x, (fma y, z, (fmul u, v)))
   10495     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
   10496     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
   10497         isContractableFMUL(N1.getOperand(2))) {
   10498       SDValue N20 = N1.getOperand(2).getOperand(0);
   10499       SDValue N21 = N1.getOperand(2).getOperand(1);
   10500       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10501                          DAG.getNode(ISD::FNEG, SL, VT,
   10502                                      N1.getOperand(0)),
   10503                          N1.getOperand(1),
   10504                          DAG.getNode(PreferredFusedOpcode, SL, VT,
   10505                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
   10506                                      N21, N0, Flags), Flags);
   10507     }
   10508 
   10509 
   10510     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
   10511     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
   10512     if (N0.getOpcode() == PreferredFusedOpcode) {
   10513       SDValue N02 = N0.getOperand(2);
   10514       if (N02.getOpcode() == ISD::FP_EXTEND) {
   10515         SDValue N020 = N02.getOperand(0);
   10516         if (isContractableFMUL(N020) &&
   10517             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
   10518           return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10519                              N0.getOperand(0), N0.getOperand(1),
   10520                              DAG.getNode(PreferredFusedOpcode, SL, VT,
   10521                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10522                                                      N020.getOperand(0)),
   10523                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10524                                                      N020.getOperand(1)),
   10525                                          DAG.getNode(ISD::FNEG, SL, VT,
   10526                                                      N1), Flags), Flags);
   10527         }
   10528       }
   10529     }
   10530 
   10531     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
   10532     //   -> (fma (fpext x), (fpext y),
   10533     //           (fma (fpext u), (fpext v), (fneg z)))
   10534     // FIXME: This turns two single-precision and one double-precision
   10535     // operation into two double-precision operations, which might not be
   10536     // interesting for all targets, especially GPUs.
   10537     if (N0.getOpcode() == ISD::FP_EXTEND) {
   10538       SDValue N00 = N0.getOperand(0);
   10539       if (N00.getOpcode() == PreferredFusedOpcode) {
   10540         SDValue N002 = N00.getOperand(2);
   10541         if (isContractableFMUL(N002) &&
   10542             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
   10543           return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10544                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10545                                          N00.getOperand(0)),
   10546                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10547                                          N00.getOperand(1)),
   10548                              DAG.getNode(PreferredFusedOpcode, SL, VT,
   10549                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10550                                                      N002.getOperand(0)),
   10551                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10552                                                      N002.getOperand(1)),
   10553                                          DAG.getNode(ISD::FNEG, SL, VT,
   10554                                                      N1), Flags), Flags);
   10555         }
   10556       }
   10557     }
   10558 
   10559     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
   10560     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
   10561     if (N1.getOpcode() == PreferredFusedOpcode &&
   10562         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
   10563       SDValue N120 = N1.getOperand(2).getOperand(0);
   10564       if (isContractableFMUL(N120) &&
   10565           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
   10566         SDValue N1200 = N120.getOperand(0);
   10567         SDValue N1201 = N120.getOperand(1);
   10568         return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10569                            DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
   10570                            N1.getOperand(1),
   10571                            DAG.getNode(PreferredFusedOpcode, SL, VT,
   10572                                        DAG.getNode(ISD::FNEG, SL, VT,
   10573                                                    DAG.getNode(ISD::FP_EXTEND, SL,
   10574                                                                VT, N1200)),
   10575                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10576                                                    N1201),
   10577                                        N0, Flags), Flags);
   10578       }
   10579     }
   10580 
   10581     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
   10582     //   -> (fma (fneg (fpext y)), (fpext z),
   10583     //           (fma (fneg (fpext u)), (fpext v), x))
   10584     // FIXME: This turns two single-precision and one double-precision
   10585     // operation into two double-precision operations, which might not be
   10586     // interesting for all targets, especially GPUs.
   10587     if (N1.getOpcode() == ISD::FP_EXTEND &&
   10588         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
   10589       SDValue CvtSrc = N1.getOperand(0);
   10590       SDValue N100 = CvtSrc.getOperand(0);
   10591       SDValue N101 = CvtSrc.getOperand(1);
   10592       SDValue N102 = CvtSrc.getOperand(2);
   10593       if (isContractableFMUL(N102) &&
   10594           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
   10595         SDValue N1020 = N102.getOperand(0);
   10596         SDValue N1021 = N102.getOperand(1);
   10597         return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10598                            DAG.getNode(ISD::FNEG, SL, VT,
   10599                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10600                                                    N100)),
   10601                            DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
   10602                            DAG.getNode(PreferredFusedOpcode, SL, VT,
   10603                                        DAG.getNode(ISD::FNEG, SL, VT,
   10604                                                    DAG.getNode(ISD::FP_EXTEND, SL,
   10605                                                                VT, N1020)),
   10606                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
   10607                                                    N1021),
   10608                                        N0, Flags), Flags);
   10609       }
   10610     }
   10611   }
   10612 
   10613   return SDValue();
   10614 }
   10615 
   10616 /// Try to perform FMA combining on a given FMUL node based on the distributive
   10617 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
   10618 /// subtraction instead of addition).
   10619 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
   10620   SDValue N0 = N->getOperand(0);
   10621   SDValue N1 = N->getOperand(1);
   10622   EVT VT = N->getValueType(0);
   10623   SDLoc SL(N);
   10624   const SDNodeFlags Flags = N->getFlags();
   10625 
   10626   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
   10627 
   10628   const TargetOptions &Options = DAG.getTarget().Options;
   10629 
   10630   // The transforms below are incorrect when x == 0 and y == inf, because the
   10631   // intermediate multiplication produces a nan.
   10632   if (!Options.NoInfsFPMath)
   10633     return SDValue();
   10634 
   10635   // Floating-point multiply-add without intermediate rounding.
   10636   bool HasFMA =
   10637       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
   10638       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
   10639       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
   10640 
   10641   // Floating-point multiply-add with intermediate rounding. This can result
   10642   // in a less precise result due to the changed rounding order.
   10643   bool HasFMAD = Options.UnsafeFPMath &&
   10644                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
   10645 
   10646   // No valid opcode, do not combine.
   10647   if (!HasFMAD && !HasFMA)
   10648     return SDValue();
   10649 
   10650   // Always prefer FMAD to FMA for precision.
   10651   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
   10652   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
   10653 
   10654   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
   10655   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
   10656   auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
   10657     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
   10658       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
   10659       if (XC1 && XC1->isExactlyValue(+1.0))
   10660         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
   10661                            Y, Flags);
   10662       if (XC1 && XC1->isExactlyValue(-1.0))
   10663         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
   10664                            DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
   10665     }
   10666     return SDValue();
   10667   };
   10668 
   10669   if (SDValue FMA = FuseFADD(N0, N1, Flags))
   10670     return FMA;
   10671   if (SDValue FMA = FuseFADD(N1, N0, Flags))
   10672     return FMA;
   10673 
   10674   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
   10675   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
   10676   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
   10677   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
   10678   auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
   10679     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
   10680       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
   10681       if (XC0 && XC0->isExactlyValue(+1.0))
   10682         return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10683                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
   10684                            Y, Flags);
   10685       if (XC0 && XC0->isExactlyValue(-1.0))
   10686         return DAG.getNode(PreferredFusedOpcode, SL, VT,
   10687                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
   10688                            DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
   10689 
   10690       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
   10691       if (XC1 && XC1->isExactlyValue(+1.0))
   10692         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
   10693                            DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
   10694       if (XC1 && XC1->isExactlyValue(-1.0))
   10695         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
   10696                            Y, Flags);
   10697     }
   10698     return SDValue();
   10699   };
   10700 
   10701   if (SDValue FMA = FuseFSUB(N0, N1, Flags))
   10702     return FMA;
   10703   if (SDValue FMA = FuseFSUB(N1, N0, Flags))
   10704     return FMA;
   10705 
   10706   return SDValue();
   10707 }
   10708 
   10709 static bool isFMulNegTwo(SDValue &N) {
   10710   if (N.getOpcode() != ISD::FMUL)
   10711     return false;
   10712   if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1)))
   10713     return CFP->isExactlyValue(-2.0);
   10714   return false;
   10715 }
   10716 
   10717 SDValue DAGCombiner::visitFADD(SDNode *N) {
   10718   SDValue N0 = N->getOperand(0);
   10719   SDValue N1 = N->getOperand(1);
   10720   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
   10721   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
   10722   EVT VT = N->getValueType(0);
   10723   SDLoc DL(N);
   10724   const TargetOptions &Options = DAG.getTarget().Options;
   10725   const SDNodeFlags Flags = N->getFlags();
   10726 
   10727   // fold vector ops
   10728   if (VT.isVector())
   10729     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   10730       return FoldedVOp;
   10731 
   10732   // fold (fadd c1, c2) -> c1 + c2
   10733   if (N0CFP && N1CFP)
   10734     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
   10735 
   10736   // canonicalize constant to RHS
   10737   if (N0CFP && !N1CFP)
   10738     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
   10739 
   10740   if (SDValue NewSel = foldBinOpIntoSelect(N))
   10741     return NewSel;
   10742 
   10743   // fold (fadd A, (fneg B)) -> (fsub A, B)
   10744   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
   10745       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
   10746     return DAG.getNode(ISD::FSUB, DL, VT, N0,
   10747                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
   10748 
   10749   // fold (fadd (fneg A), B) -> (fsub B, A)
   10750   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
   10751       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
   10752     return DAG.getNode(ISD::FSUB, DL, VT, N1,
   10753                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
   10754 
   10755   // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
   10756   // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
   10757   if ((isFMulNegTwo(N0) && N0.hasOneUse()) ||
   10758       (isFMulNegTwo(N1) && N1.hasOneUse())) {
   10759     bool N1IsFMul = isFMulNegTwo(N1);
   10760     SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
   10761     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
   10762     return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
   10763   }
   10764 
   10765   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1);
   10766   if (N1C && N1C->isZero()) {
   10767     if (N1C->isNegative() || Options.UnsafeFPMath ||
   10768         Flags.hasNoSignedZeros()) {
   10769       // fold (fadd A, 0) -> A
   10770       return N0;
   10771     }
   10772   }
   10773 
   10774   // No FP constant should be created after legalization as Instruction
   10775   // Selection pass has a hard time dealing with FP constants.
   10776   bool AllowNewConst = (Level < AfterLegalizeDAG);
   10777 
   10778   // If 'unsafe math' or nnan is enabled, fold lots of things.
   10779   if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
   10780     // If allowed, fold (fadd (fneg x), x) -> 0.0
   10781     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
   10782       return DAG.getConstantFP(0.0, DL, VT);
   10783 
   10784     // If allowed, fold (fadd x, (fneg x)) -> 0.0
   10785     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
   10786       return DAG.getConstantFP(0.0, DL, VT);
   10787   }
   10788 
   10789   // If 'unsafe math' or reassoc and nsz, fold lots of things.
   10790   // TODO: break out portions of the transformations below for which Unsafe is
   10791   //       considered and which do not require both nsz and reassoc
   10792   if ((Options.UnsafeFPMath ||
   10793        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
   10794       AllowNewConst) {
   10795     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
   10796     if (N1CFP && N0.getOpcode() == ISD::FADD &&
   10797         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
   10798       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
   10799       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
   10800     }
   10801 
   10802     // We can fold chains of FADD's of the same value into multiplications.
   10803     // This transform is not safe in general because we are reducing the number
   10804     // of rounding steps.
   10805     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
   10806       if (N0.getOpcode() == ISD::FMUL) {
   10807         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
   10808         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
   10809 
   10810         // (fadd (fmul x, c), x) -> (fmul x, c+1)
   10811         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
   10812           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
   10813                                        DAG.getConstantFP(1.0, DL, VT), Flags);
   10814           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
   10815         }
   10816 
   10817         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
   10818         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
   10819             N1.getOperand(0) == N1.getOperand(1) &&
   10820             N0.getOperand(0) == N1.getOperand(0)) {
   10821           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
   10822                                        DAG.getConstantFP(2.0, DL, VT), Flags);
   10823           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
   10824         }
   10825       }
   10826 
   10827       if (N1.getOpcode() == ISD::FMUL) {
   10828         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
   10829         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
   10830 
   10831         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
   10832         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
   10833           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
   10834                                        DAG.getConstantFP(1.0, DL, VT), Flags);
   10835           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
   10836         }
   10837 
   10838         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
   10839         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
   10840             N0.getOperand(0) == N0.getOperand(1) &&
   10841             N1.getOperand(0) == N0.getOperand(0)) {
   10842           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
   10843                                        DAG.getConstantFP(2.0, DL, VT), Flags);
   10844           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
   10845         }
   10846       }
   10847 
   10848       if (N0.getOpcode() == ISD::FADD) {
   10849         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
   10850         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
   10851         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
   10852             (N0.getOperand(0) == N1)) {
   10853           return DAG.getNode(ISD::FMUL, DL, VT,
   10854                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
   10855         }
   10856       }
   10857 
   10858       if (N1.getOpcode() == ISD::FADD) {
   10859         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
   10860         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
   10861         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
   10862             N1.getOperand(0) == N0) {
   10863           return DAG.getNode(ISD::FMUL, DL, VT,
   10864                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
   10865         }
   10866       }
   10867 
   10868       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
   10869       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
   10870           N0.getOperand(0) == N0.getOperand(1) &&
   10871           N1.getOperand(0) == N1.getOperand(1) &&
   10872           N0.getOperand(0) == N1.getOperand(0)) {
   10873         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
   10874                            DAG.getConstantFP(4.0, DL, VT), Flags);
   10875       }
   10876     }
   10877   } // enable-unsafe-fp-math
   10878 
   10879   // FADD -> FMA combines:
   10880   if (SDValue Fused = visitFADDForFMACombine(N)) {
   10881     AddToWorklist(Fused.getNode());
   10882     return Fused;
   10883   }
   10884   return SDValue();
   10885 }
   10886 
   10887 SDValue DAGCombiner::visitFSUB(SDNode *N) {
   10888   SDValue N0 = N->getOperand(0);
   10889   SDValue N1 = N->getOperand(1);
   10890   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
   10891   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
   10892   EVT VT = N->getValueType(0);
   10893   SDLoc DL(N);
   10894   const TargetOptions &Options = DAG.getTarget().Options;
   10895   const SDNodeFlags Flags = N->getFlags();
   10896 
   10897   // fold vector ops
   10898   if (VT.isVector())
   10899     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   10900       return FoldedVOp;
   10901 
   10902   // fold (fsub c1, c2) -> c1-c2
   10903   if (N0CFP && N1CFP)
   10904     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
   10905 
   10906   if (SDValue NewSel = foldBinOpIntoSelect(N))
   10907     return NewSel;
   10908 
   10909   // (fsub A, 0) -> A
   10910   if (N1CFP && N1CFP->isZero()) {
   10911     if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
   10912         Flags.hasNoSignedZeros()) {
   10913       return N0;
   10914     }
   10915   }
   10916 
   10917   if (N0 == N1) {
   10918     // (fsub x, x) -> 0.0
   10919     if (Options.UnsafeFPMath || Flags.hasNoNaNs())
   10920       return DAG.getConstantFP(0.0f, DL, VT);
   10921   }
   10922 
   10923   // (fsub 0, B) -> -B
   10924   if (N0CFP && N0CFP->isZero()) {
   10925     if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) {
   10926       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
   10927         return GetNegatedExpression(N1, DAG, LegalOperations);
   10928       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
   10929         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
   10930     }
   10931   }
   10932 
   10933   // fold (fsub A, (fneg B)) -> (fadd A, B)
   10934   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
   10935     return DAG.getNode(ISD::FADD, DL, VT, N0,
   10936                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
   10937 
   10938   // If 'unsafe math' is enabled, fold lots of things.
   10939   if (Options.UnsafeFPMath) {
   10940     // (fsub x, (fadd x, y)) -> (fneg y)
   10941     // (fsub x, (fadd y, x)) -> (fneg y)
   10942     if (N1.getOpcode() == ISD::FADD) {
   10943       SDValue N10 = N1->getOperand(0);
   10944       SDValue N11 = N1->getOperand(1);
   10945 
   10946       if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
   10947         return GetNegatedExpression(N11, DAG, LegalOperations);
   10948 
   10949       if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
   10950         return GetNegatedExpression(N10, DAG, LegalOperations);
   10951     }
   10952   }
   10953 
   10954   // FSUB -> FMA combines:
   10955   if (SDValue Fused = visitFSUBForFMACombine(N)) {
   10956     AddToWorklist(Fused.getNode());
   10957     return Fused;
   10958   }
   10959 
   10960   return SDValue();
   10961 }
   10962 
   10963 SDValue DAGCombiner::visitFMUL(SDNode *N) {
   10964   SDValue N0 = N->getOperand(0);
   10965   SDValue N1 = N->getOperand(1);
   10966   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
   10967   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
   10968   EVT VT = N->getValueType(0);
   10969   SDLoc DL(N);
   10970   const TargetOptions &Options = DAG.getTarget().Options;
   10971   const SDNodeFlags Flags = N->getFlags();
   10972 
   10973   // fold vector ops
   10974   if (VT.isVector()) {
   10975     // This just handles C1 * C2 for vectors. Other vector folds are below.
   10976     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   10977       return FoldedVOp;
   10978   }
   10979 
   10980   // fold (fmul c1, c2) -> c1*c2
   10981   if (N0CFP && N1CFP)
   10982     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
   10983 
   10984   // canonicalize constant to RHS
   10985   if (isConstantFPBuildVectorOrConstantFP(N0) &&
   10986      !isConstantFPBuildVectorOrConstantFP(N1))
   10987     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
   10988 
   10989   // fold (fmul A, 1.0) -> A
   10990   if (N1CFP && N1CFP->isExactlyValue(1.0))
   10991     return N0;
   10992 
   10993   if (SDValue NewSel = foldBinOpIntoSelect(N))
   10994     return NewSel;
   10995 
   10996   if (Options.UnsafeFPMath ||
   10997       (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
   10998     // fold (fmul A, 0) -> 0
   10999     if (N1CFP && N1CFP->isZero())
   11000       return N1;
   11001   }
   11002 
   11003   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
   11004     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
   11005     if (N0.getOpcode() == ISD::FMUL) {
   11006       // Fold scalars or any vector constants (not just splats).
   11007       // This fold is done in general by InstCombine, but extra fmul insts
   11008       // may have been generated during lowering.
   11009       SDValue N00 = N0.getOperand(0);
   11010       SDValue N01 = N0.getOperand(1);
   11011       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
   11012       auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
   11013       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
   11014 
   11015       // Check 1: Make sure that the first operand of the inner multiply is NOT
   11016       // a constant. Otherwise, we may induce infinite looping.
   11017       if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
   11018         // Check 2: Make sure that the second operand of the inner multiply and
   11019         // the second operand of the outer multiply are constants.
   11020         if ((N1CFP && isConstOrConstSplatFP(N01)) ||
   11021             (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
   11022           SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
   11023           return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
   11024         }
   11025       }
   11026     }
   11027 
   11028     // Match a special-case: we convert X * 2.0 into fadd.
   11029     // fmul (fadd X, X), C -> fmul X, 2.0 * C
   11030     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
   11031         N0.getOperand(0) == N0.getOperand(1)) {
   11032       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
   11033       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
   11034       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
   11035     }
   11036   }
   11037 
   11038   // fold (fmul X, 2.0) -> (fadd X, X)
   11039   if (N1CFP && N1CFP->isExactlyValue(+2.0))
   11040     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
   11041 
   11042   // fold (fmul X, -1.0) -> (fneg X)
   11043   if (N1CFP && N1CFP->isExactlyValue(-1.0))
   11044     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
   11045       return DAG.getNode(ISD::FNEG, DL, VT, N0);
   11046 
   11047   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
   11048   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
   11049     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
   11050       // Both can be negated for free, check to see if at least one is cheaper
   11051       // negated.
   11052       if (LHSNeg == 2 || RHSNeg == 2)
   11053         return DAG.getNode(ISD::FMUL, DL, VT,
   11054                            GetNegatedExpression(N0, DAG, LegalOperations),
   11055                            GetNegatedExpression(N1, DAG, LegalOperations),
   11056                            Flags);
   11057     }
   11058   }
   11059 
   11060   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
   11061   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
   11062   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
   11063       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
   11064       TLI.isOperationLegal(ISD::FABS, VT)) {
   11065     SDValue Select = N0, X = N1;
   11066     if (Select.getOpcode() != ISD::SELECT)
   11067       std::swap(Select, X);
   11068 
   11069     SDValue Cond = Select.getOperand(0);
   11070     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
   11071     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
   11072 
   11073     if (TrueOpnd && FalseOpnd &&
   11074         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
   11075         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
   11076         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
   11077       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
   11078       switch (CC) {
   11079       default: break;
   11080       case ISD::SETOLT:
   11081       case ISD::SETULT:
   11082       case ISD::SETOLE:
   11083       case ISD::SETULE:
   11084       case ISD::SETLT:
   11085       case ISD::SETLE:
   11086         std::swap(TrueOpnd, FalseOpnd);
   11087         LLVM_FALLTHROUGH;
   11088       case ISD::SETOGT:
   11089       case ISD::SETUGT:
   11090       case ISD::SETOGE:
   11091       case ISD::SETUGE:
   11092       case ISD::SETGT:
   11093       case ISD::SETGE:
   11094         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
   11095             TLI.isOperationLegal(ISD::FNEG, VT))
   11096           return DAG.getNode(ISD::FNEG, DL, VT,
   11097                    DAG.getNode(ISD::FABS, DL, VT, X));
   11098         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
   11099           return DAG.getNode(ISD::FABS, DL, VT, X);
   11100 
   11101         break;
   11102       }
   11103     }
   11104   }
   11105 
   11106   // FMUL -> FMA combines:
   11107   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
   11108     AddToWorklist(Fused.getNode());
   11109     return Fused;
   11110   }
   11111 
   11112   return SDValue();
   11113 }
   11114 
   11115 SDValue DAGCombiner::visitFMA(SDNode *N) {
   11116   SDValue N0 = N->getOperand(0);
   11117   SDValue N1 = N->getOperand(1);
   11118   SDValue N2 = N->getOperand(2);
   11119   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   11120   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   11121   EVT VT = N->getValueType(0);
   11122   SDLoc DL(N);
   11123   const TargetOptions &Options = DAG.getTarget().Options;
   11124 
   11125   // FMA nodes have flags that propagate to the created nodes.
   11126   const SDNodeFlags Flags = N->getFlags();
   11127   bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
   11128 
   11129   // Constant fold FMA.
   11130   if (isa<ConstantFPSDNode>(N0) &&
   11131       isa<ConstantFPSDNode>(N1) &&
   11132       isa<ConstantFPSDNode>(N2)) {
   11133     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
   11134   }
   11135 
   11136   if (UnsafeFPMath) {
   11137     if (N0CFP && N0CFP->isZero())
   11138       return N2;
   11139     if (N1CFP && N1CFP->isZero())
   11140       return N2;
   11141   }
   11142   // TODO: The FMA node should have flags that propagate to these nodes.
   11143   if (N0CFP && N0CFP->isExactlyValue(1.0))
   11144     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
   11145   if (N1CFP && N1CFP->isExactlyValue(1.0))
   11146     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
   11147 
   11148   // Canonicalize (fma c, x, y) -> (fma x, c, y)
   11149   if (isConstantFPBuildVectorOrConstantFP(N0) &&
   11150      !isConstantFPBuildVectorOrConstantFP(N1))
   11151     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
   11152 
   11153   if (UnsafeFPMath) {
   11154     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
   11155     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
   11156         isConstantFPBuildVectorOrConstantFP(N1) &&
   11157         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
   11158       return DAG.getNode(ISD::FMUL, DL, VT, N0,
   11159                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
   11160                                      Flags), Flags);
   11161     }
   11162 
   11163     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
   11164     if (N0.getOpcode() == ISD::FMUL &&
   11165         isConstantFPBuildVectorOrConstantFP(N1) &&
   11166         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
   11167       return DAG.getNode(ISD::FMA, DL, VT,
   11168                          N0.getOperand(0),
   11169                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
   11170                                      Flags),
   11171                          N2);
   11172     }
   11173   }
   11174 
   11175   // (fma x, 1, y) -> (fadd x, y)
   11176   // (fma x, -1, y) -> (fadd (fneg x), y)
   11177   if (N1CFP) {
   11178     if (N1CFP->isExactlyValue(1.0))
   11179       // TODO: The FMA node should have flags that propagate to this node.
   11180       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
   11181 
   11182     if (N1CFP->isExactlyValue(-1.0) &&
   11183         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
   11184       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
   11185       AddToWorklist(RHSNeg.getNode());
   11186       // TODO: The FMA node should have flags that propagate to this node.
   11187       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
   11188     }
   11189 
   11190     // fma (fneg x), K, y -> fma x -K, y
   11191     if (N0.getOpcode() == ISD::FNEG &&
   11192         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
   11193          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
   11194       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
   11195                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
   11196     }
   11197   }
   11198 
   11199   if (UnsafeFPMath) {
   11200     // (fma x, c, x) -> (fmul x, (c+1))
   11201     if (N1CFP && N0 == N2) {
   11202       return DAG.getNode(ISD::FMUL, DL, VT, N0,
   11203                          DAG.getNode(ISD::FADD, DL, VT, N1,
   11204                                      DAG.getConstantFP(1.0, DL, VT), Flags),
   11205                          Flags);
   11206     }
   11207 
   11208     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
   11209     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
   11210       return DAG.getNode(ISD::FMUL, DL, VT, N0,
   11211                          DAG.getNode(ISD::FADD, DL, VT, N1,
   11212                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
   11213                          Flags);
   11214     }
   11215   }
   11216 
   11217   return SDValue();
   11218 }
   11219 
   11220 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
   11221 // reciprocal.
   11222 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
   11223 // Notice that this is not always beneficial. One reason is different targets
   11224 // may have different costs for FDIV and FMUL, so sometimes the cost of two
   11225 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
   11226 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
   11227 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
   11228   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
   11229   const SDNodeFlags Flags = N->getFlags();
   11230   if (!UnsafeMath && !Flags.hasAllowReciprocal())
   11231     return SDValue();
   11232 
   11233   // Skip if current node is a reciprocal.
   11234   SDValue N0 = N->getOperand(0);
   11235   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   11236   if (N0CFP && N0CFP->isExactlyValue(1.0))
   11237     return SDValue();
   11238 
   11239   // Exit early if the target does not want this transform or if there can't
   11240   // possibly be enough uses of the divisor to make the transform worthwhile.
   11241   SDValue N1 = N->getOperand(1);
   11242   unsigned MinUses = TLI.combineRepeatedFPDivisors();
   11243   if (!MinUses || N1->use_size() < MinUses)
   11244     return SDValue();
   11245 
   11246   // Find all FDIV users of the same divisor.
   11247   // Use a set because duplicates may be present in the user list.
   11248   SetVector<SDNode *> Users;
   11249   for (auto *U : N1->uses()) {
   11250     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
   11251       // This division is eligible for optimization only if global unsafe math
   11252       // is enabled or if this division allows reciprocal formation.
   11253       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
   11254         Users.insert(U);
   11255     }
   11256   }
   11257 
   11258   // Now that we have the actual number of divisor uses, make sure it meets
   11259   // the minimum threshold specified by the target.
   11260   if (Users.size() < MinUses)
   11261     return SDValue();
   11262 
   11263   EVT VT = N->getValueType(0);
   11264   SDLoc DL(N);
   11265   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
   11266   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
   11267 
   11268   // Dividend / Divisor -> Dividend * Reciprocal
   11269   for (auto *U : Users) {
   11270     SDValue Dividend = U->getOperand(0);
   11271     if (Dividend != FPOne) {
   11272       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
   11273                                     Reciprocal, Flags);
   11274       CombineTo(U, NewNode);
   11275     } else if (U != Reciprocal.getNode()) {
   11276       // In the absence of fast-math-flags, this user node is always the
   11277       // same node as Reciprocal, but with FMF they may be different nodes.
   11278       CombineTo(U, Reciprocal);
   11279     }
   11280   }
   11281   return SDValue(N, 0);  // N was replaced.
   11282 }
   11283 
   11284 SDValue DAGCombiner::visitFDIV(SDNode *N) {
   11285   SDValue N0 = N->getOperand(0);
   11286   SDValue N1 = N->getOperand(1);
   11287   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   11288   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   11289   EVT VT = N->getValueType(0);
   11290   SDLoc DL(N);
   11291   const TargetOptions &Options = DAG.getTarget().Options;
   11292   SDNodeFlags Flags = N->getFlags();
   11293 
   11294   // fold vector ops
   11295   if (VT.isVector())
   11296     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   11297       return FoldedVOp;
   11298 
   11299   // fold (fdiv c1, c2) -> c1/c2
   11300   if (N0CFP && N1CFP)
   11301     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
   11302 
   11303   if (SDValue NewSel = foldBinOpIntoSelect(N))
   11304     return NewSel;
   11305 
   11306   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
   11307     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
   11308     if (N1CFP) {
   11309       // Compute the reciprocal 1.0 / c2.
   11310       const APFloat &N1APF = N1CFP->getValueAPF();
   11311       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
   11312       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
   11313       // Only do the transform if the reciprocal is a legal fp immediate that
   11314       // isn't too nasty (eg NaN, denormal, ...).
   11315       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
   11316           (!LegalOperations ||
   11317            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
   11318            // backend)... we should handle this gracefully after Legalize.
   11319            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
   11320            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
   11321            TLI.isFPImmLegal(Recip, VT)))
   11322         return DAG.getNode(ISD::FMUL, DL, VT, N0,
   11323                            DAG.getConstantFP(Recip, DL, VT), Flags);
   11324     }
   11325 
   11326     // If this FDIV is part of a reciprocal square root, it may be folded
   11327     // into a target-specific square root estimate instruction.
   11328     if (N1.getOpcode() == ISD::FSQRT) {
   11329       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
   11330         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
   11331       }
   11332     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
   11333                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
   11334       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
   11335                                           Flags)) {
   11336         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
   11337         AddToWorklist(RV.getNode());
   11338         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
   11339       }
   11340     } else if (N1.getOpcode() == ISD::FP_ROUND &&
   11341                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
   11342       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
   11343                                           Flags)) {
   11344         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
   11345         AddToWorklist(RV.getNode());
   11346         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
   11347       }
   11348     } else if (N1.getOpcode() == ISD::FMUL) {
   11349       // Look through an FMUL. Even though this won't remove the FDIV directly,
   11350       // it's still worthwhile to get rid of the FSQRT if possible.
   11351       SDValue SqrtOp;
   11352       SDValue OtherOp;
   11353       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
   11354         SqrtOp = N1.getOperand(0);
   11355         OtherOp = N1.getOperand(1);
   11356       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
   11357         SqrtOp = N1.getOperand(1);
   11358         OtherOp = N1.getOperand(0);
   11359       }
   11360       if (SqrtOp.getNode()) {
   11361         // We found a FSQRT, so try to make this fold:
   11362         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
   11363         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
   11364           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
   11365           AddToWorklist(RV.getNode());
   11366           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
   11367         }
   11368       }
   11369     }
   11370 
   11371     // Fold into a reciprocal estimate and multiply instead of a real divide.
   11372     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
   11373       AddToWorklist(RV.getNode());
   11374       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
   11375     }
   11376   }
   11377 
   11378   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
   11379   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
   11380     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
   11381       // Both can be negated for free, check to see if at least one is cheaper
   11382       // negated.
   11383       if (LHSNeg == 2 || RHSNeg == 2)
   11384         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
   11385                            GetNegatedExpression(N0, DAG, LegalOperations),
   11386                            GetNegatedExpression(N1, DAG, LegalOperations),
   11387                            Flags);
   11388     }
   11389   }
   11390 
   11391   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
   11392     return CombineRepeatedDivisors;
   11393 
   11394   return SDValue();
   11395 }
   11396 
   11397 SDValue DAGCombiner::visitFREM(SDNode *N) {
   11398   SDValue N0 = N->getOperand(0);
   11399   SDValue N1 = N->getOperand(1);
   11400   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   11401   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   11402   EVT VT = N->getValueType(0);
   11403 
   11404   // fold (frem c1, c2) -> fmod(c1,c2)
   11405   if (N0CFP && N1CFP)
   11406     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
   11407 
   11408   if (SDValue NewSel = foldBinOpIntoSelect(N))
   11409     return NewSel;
   11410 
   11411   return SDValue();
   11412 }
   11413 
   11414 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
   11415   SDNodeFlags Flags = N->getFlags();
   11416   if (!DAG.getTarget().Options.UnsafeFPMath &&
   11417       !Flags.hasApproximateFuncs())
   11418     return SDValue();
   11419 
   11420   SDValue N0 = N->getOperand(0);
   11421   if (TLI.isFsqrtCheap(N0, DAG))
   11422     return SDValue();
   11423 
   11424   // FSQRT nodes have flags that propagate to the created nodes.
   11425   return buildSqrtEstimate(N0, Flags);
   11426 }
   11427 
   11428 /// copysign(x, fp_extend(y)) -> copysign(x, y)
   11429 /// copysign(x, fp_round(y)) -> copysign(x, y)
   11430 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
   11431   SDValue N1 = N->getOperand(1);
   11432   if ((N1.getOpcode() == ISD::FP_EXTEND ||
   11433        N1.getOpcode() == ISD::FP_ROUND)) {
   11434     // Do not optimize out type conversion of f128 type yet.
   11435     // For some targets like x86_64, configuration is changed to keep one f128
   11436     // value in one SSE register, but instruction selection cannot handle
   11437     // FCOPYSIGN on SSE registers yet.
   11438     EVT N1VT = N1->getValueType(0);
   11439     EVT N1Op0VT = N1->getOperand(0).getValueType();
   11440     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
   11441   }
   11442   return false;
   11443 }
   11444 
   11445 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
   11446   SDValue N0 = N->getOperand(0);
   11447   SDValue N1 = N->getOperand(1);
   11448   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   11449   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   11450   EVT VT = N->getValueType(0);
   11451 
   11452   if (N0CFP && N1CFP) // Constant fold
   11453     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
   11454 
   11455   if (N1CFP) {
   11456     const APFloat &V = N1CFP->getValueAPF();
   11457     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
   11458     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
   11459     if (!V.isNegative()) {
   11460       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
   11461         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
   11462     } else {
   11463       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
   11464         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
   11465                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
   11466     }
   11467   }
   11468 
   11469   // copysign(fabs(x), y) -> copysign(x, y)
   11470   // copysign(fneg(x), y) -> copysign(x, y)
   11471   // copysign(copysign(x,z), y) -> copysign(x, y)
   11472   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
   11473       N0.getOpcode() == ISD::FCOPYSIGN)
   11474     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
   11475 
   11476   // copysign(x, abs(y)) -> abs(x)
   11477   if (N1.getOpcode() == ISD::FABS)
   11478     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
   11479 
   11480   // copysign(x, copysign(y,z)) -> copysign(x, z)
   11481   if (N1.getOpcode() == ISD::FCOPYSIGN)
   11482     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
   11483 
   11484   // copysign(x, fp_extend(y)) -> copysign(x, y)
   11485   // copysign(x, fp_round(y)) -> copysign(x, y)
   11486   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
   11487     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
   11488 
   11489   return SDValue();
   11490 }
   11491 
   11492 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
   11493                                const TargetLowering &TLI) {
   11494   // This optimization is guarded by a function attribute because it may produce
   11495   // unexpected results. Ie, programs may be relying on the platform-specific
   11496   // undefined behavior when the float-to-int conversion overflows.
   11497   const Function &F = DAG.getMachineFunction().getFunction();
   11498   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
   11499   if (StrictOverflow.getValueAsString().equals("false"))
   11500     return SDValue();
   11501 
   11502   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
   11503   // replacing casts with a libcall. We also must be allowed to ignore -0.0
   11504   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
   11505   // conversions would return +0.0.
   11506   // FIXME: We should be able to use node-level FMF here.
   11507   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
   11508   EVT VT = N->getValueType(0);
   11509   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
   11510       !DAG.getTarget().Options.NoSignedZerosFPMath)
   11511     return SDValue();
   11512 
   11513   // fptosi/fptoui round towards zero, so converting from FP to integer and
   11514   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
   11515   SDValue N0 = N->getOperand(0);
   11516   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
   11517       N0.getOperand(0).getValueType() == VT)
   11518     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
   11519 
   11520   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
   11521       N0.getOperand(0).getValueType() == VT)
   11522     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
   11523 
   11524   return SDValue();
   11525 }
   11526 
   11527 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
   11528   SDValue N0 = N->getOperand(0);
   11529   EVT VT = N->getValueType(0);
   11530   EVT OpVT = N0.getValueType();
   11531 
   11532   // fold (sint_to_fp c1) -> c1fp
   11533   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
   11534       // ...but only if the target supports immediate floating-point values
   11535       (!LegalOperations ||
   11536        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
   11537     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
   11538 
   11539   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
   11540   // but UINT_TO_FP is legal on this target, try to convert.
   11541   if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
   11542       TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
   11543     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
   11544     if (DAG.SignBitIsZero(N0))
   11545       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
   11546   }
   11547 
   11548   // The next optimizations are desirable only if SELECT_CC can be lowered.
   11549   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
   11550     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
   11551     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
   11552         !VT.isVector() &&
   11553         (!LegalOperations ||
   11554          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
   11555       SDLoc DL(N);
   11556       SDValue Ops[] =
   11557         { N0.getOperand(0), N0.getOperand(1),
   11558           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
   11559           N0.getOperand(2) };
   11560       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
   11561     }
   11562 
   11563     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
   11564     //      (select_cc x, y, 1.0, 0.0,, cc)
   11565     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
   11566         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
   11567         (!LegalOperations ||
   11568          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
   11569       SDLoc DL(N);
   11570       SDValue Ops[] =
   11571         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
   11572           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
   11573           N0.getOperand(0).getOperand(2) };
   11574       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
   11575     }
   11576   }
   11577 
   11578   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
   11579     return FTrunc;
   11580 
   11581   return SDValue();
   11582 }
   11583 
   11584 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   11585   SDValue N0 = N->getOperand(0);
   11586   EVT VT = N->getValueType(0);
   11587   EVT OpVT = N0.getValueType();
   11588 
   11589   // fold (uint_to_fp c1) -> c1fp
   11590   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
   11591       // ...but only if the target supports immediate floating-point values
   11592       (!LegalOperations ||
   11593        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
   11594     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
   11595 
   11596   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
   11597   // but SINT_TO_FP is legal on this target, try to convert.
   11598   if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
   11599       TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
   11600     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
   11601     if (DAG.SignBitIsZero(N0))
   11602       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
   11603   }
   11604 
   11605   // The next optimizations are desirable only if SELECT_CC can be lowered.
   11606   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
   11607     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
   11608     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
   11609         (!LegalOperations ||
   11610          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
   11611       SDLoc DL(N);
   11612       SDValue Ops[] =
   11613         { N0.getOperand(0), N0.getOperand(1),
   11614           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
   11615           N0.getOperand(2) };
   11616       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
   11617     }
   11618   }
   11619 
   11620   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
   11621     return FTrunc;
   11622 
   11623   return SDValue();
   11624 }
   11625 
   11626 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
   11627 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
   11628   SDValue N0 = N->getOperand(0);
   11629   EVT VT = N->getValueType(0);
   11630 
   11631   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
   11632     return SDValue();
   11633 
   11634   SDValue Src = N0.getOperand(0);
   11635   EVT SrcVT = Src.getValueType();
   11636   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
   11637   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
   11638 
   11639   // We can safely assume the conversion won't overflow the output range,
   11640   // because (for example) (uint8_t)18293.f is undefined behavior.
   11641 
   11642   // Since we can assume the conversion won't overflow, our decision as to
   11643   // whether the input will fit in the float should depend on the minimum
   11644   // of the input range and output range.
   11645 
   11646   // This means this is also safe for a signed input and unsigned output, since
   11647   // a negative input would lead to undefined behavior.
   11648   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
   11649   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
   11650   unsigned ActualSize = std::min(InputSize, OutputSize);
   11651   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
   11652 
   11653   // We can only fold away the float conversion if the input range can be
   11654   // represented exactly in the float range.
   11655   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
   11656     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
   11657       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
   11658                                                        : ISD::ZERO_EXTEND;
   11659       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
   11660     }
   11661     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
   11662       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
   11663     return DAG.getBitcast(VT, Src);
   11664   }
   11665   return SDValue();
   11666 }
   11667 
   11668 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
   11669   SDValue N0 = N->getOperand(0);
   11670   EVT VT = N->getValueType(0);
   11671 
   11672   // fold (fp_to_sint c1fp) -> c1
   11673   if (isConstantFPBuildVectorOrConstantFP(N0))
   11674     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
   11675 
   11676   return FoldIntToFPToInt(N, DAG);
   11677 }
   11678 
   11679 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
   11680   SDValue N0 = N->getOperand(0);
   11681   EVT VT = N->getValueType(0);
   11682 
   11683   // fold (fp_to_uint c1fp) -> c1
   11684   if (isConstantFPBuildVectorOrConstantFP(N0))
   11685     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
   11686 
   11687   return FoldIntToFPToInt(N, DAG);
   11688 }
   11689 
   11690 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
   11691   SDValue N0 = N->getOperand(0);
   11692   SDValue N1 = N->getOperand(1);
   11693   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   11694   EVT VT = N->getValueType(0);
   11695 
   11696   // fold (fp_round c1fp) -> c1fp
   11697   if (N0CFP)
   11698     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
   11699 
   11700   // fold (fp_round (fp_extend x)) -> x
   11701   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
   11702     return N0.getOperand(0);
   11703 
   11704   // fold (fp_round (fp_round x)) -> (fp_round x)
   11705   if (N0.getOpcode() == ISD::FP_ROUND) {
   11706     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
   11707     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
   11708 
   11709     // Skip this folding if it results in an fp_round from f80 to f16.
   11710     //
   11711     // f80 to f16 always generates an expensive (and as yet, unimplemented)
   11712     // libcall to __truncxfhf2 instead of selecting native f16 conversion
   11713     // instructions from f32 or f64.  Moreover, the first (value-preserving)
   11714     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
   11715     // x86.
   11716     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
   11717       return SDValue();
   11718 
   11719     // If the first fp_round isn't a value preserving truncation, it might
   11720     // introduce a tie in the second fp_round, that wouldn't occur in the
   11721     // single-step fp_round we want to fold to.
   11722     // In other words, double rounding isn't the same as rounding.
   11723     // Also, this is a value preserving truncation iff both fp_round's are.
   11724     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
   11725       SDLoc DL(N);
   11726       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
   11727                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
   11728     }
   11729   }
   11730 
   11731   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
   11732   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
   11733     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
   11734                               N0.getOperand(0), N1);
   11735     AddToWorklist(Tmp.getNode());
   11736     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
   11737                        Tmp, N0.getOperand(1));
   11738   }
   11739 
   11740   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
   11741     return NewVSel;
   11742 
   11743   return SDValue();
   11744 }
   11745 
   11746 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
   11747   SDValue N0 = N->getOperand(0);
   11748   EVT VT = N->getValueType(0);
   11749   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
   11750   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   11751 
   11752   // fold (fp_round_inreg c1fp) -> c1fp
   11753   if (N0CFP && isTypeLegal(EVT)) {
   11754     SDLoc DL(N);
   11755     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
   11756     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
   11757   }
   11758 
   11759   return SDValue();
   11760 }
   11761 
   11762 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
   11763   SDValue N0 = N->getOperand(0);
   11764   EVT VT = N->getValueType(0);
   11765 
   11766   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
   11767   if (N->hasOneUse() &&
   11768       N->use_begin()->getOpcode() == ISD::FP_ROUND)
   11769     return SDValue();
   11770 
   11771   // fold (fp_extend c1fp) -> c1fp
   11772   if (isConstantFPBuildVectorOrConstantFP(N0))
   11773     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
   11774 
   11775   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
   11776   if (N0.getOpcode() == ISD::FP16_TO_FP &&
   11777       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
   11778     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
   11779 
   11780   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
   11781   // value of X.
   11782   if (N0.getOpcode() == ISD::FP_ROUND
   11783       && N0.getConstantOperandVal(1) == 1) {
   11784     SDValue In = N0.getOperand(0);
   11785     if (In.getValueType() == VT) return In;
   11786     if (VT.bitsLT(In.getValueType()))
   11787       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
   11788                          In, N0.getOperand(1));
   11789     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
   11790   }
   11791 
   11792   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
   11793   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
   11794        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
   11795     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   11796     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
   11797                                      LN0->getChain(),
   11798                                      LN0->getBasePtr(), N0.getValueType(),
   11799                                      LN0->getMemOperand());
   11800     CombineTo(N, ExtLoad);
   11801     CombineTo(N0.getNode(),
   11802               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
   11803                           N0.getValueType(), ExtLoad,
   11804                           DAG.getIntPtrConstant(1, SDLoc(N0))),
   11805               ExtLoad.getValue(1));
   11806     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   11807   }
   11808 
   11809   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
   11810     return NewVSel;
   11811 
   11812   return SDValue();
   11813 }
   11814 
   11815 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
   11816   SDValue N0 = N->getOperand(0);
   11817   EVT VT = N->getValueType(0);
   11818 
   11819   // fold (fceil c1) -> fceil(c1)
   11820   if (isConstantFPBuildVectorOrConstantFP(N0))
   11821     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
   11822 
   11823   return SDValue();
   11824 }
   11825 
   11826 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
   11827   SDValue N0 = N->getOperand(0);
   11828   EVT VT = N->getValueType(0);
   11829 
   11830   // fold (ftrunc c1) -> ftrunc(c1)
   11831   if (isConstantFPBuildVectorOrConstantFP(N0))
   11832     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
   11833 
   11834   // fold ftrunc (known rounded int x) -> x
   11835   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
   11836   // likely to be generated to extract integer from a rounded floating value.
   11837   switch (N0.getOpcode()) {
   11838   default: break;
   11839   case ISD::FRINT:
   11840   case ISD::FTRUNC:
   11841   case ISD::FNEARBYINT:
   11842   case ISD::FFLOOR:
   11843   case ISD::FCEIL:
   11844     return N0;
   11845   }
   11846 
   11847   return SDValue();
   11848 }
   11849 
   11850 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
   11851   SDValue N0 = N->getOperand(0);
   11852   EVT VT = N->getValueType(0);
   11853 
   11854   // fold (ffloor c1) -> ffloor(c1)
   11855   if (isConstantFPBuildVectorOrConstantFP(N0))
   11856     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
   11857 
   11858   return SDValue();
   11859 }
   11860 
   11861 // FIXME: FNEG and FABS have a lot in common; refactor.
   11862 SDValue DAGCombiner::visitFNEG(SDNode *N) {
   11863   SDValue N0 = N->getOperand(0);
   11864   EVT VT = N->getValueType(0);
   11865 
   11866   // Constant fold FNEG.
   11867   if (isConstantFPBuildVectorOrConstantFP(N0))
   11868     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
   11869 
   11870   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
   11871                          &DAG.getTarget().Options))
   11872     return GetNegatedExpression(N0, DAG, LegalOperations);
   11873 
   11874   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
   11875   // constant pool values.
   11876   if (!TLI.isFNegFree(VT) &&
   11877       N0.getOpcode() == ISD::BITCAST &&
   11878       N0.getNode()->hasOneUse()) {
   11879     SDValue Int = N0.getOperand(0);
   11880     EVT IntVT = Int.getValueType();
   11881     if (IntVT.isInteger() && !IntVT.isVector()) {
   11882       APInt SignMask;
   11883       if (N0.getValueType().isVector()) {
   11884         // For a vector, get a mask such as 0x80... per scalar element
   11885         // and splat it.
   11886         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
   11887         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
   11888       } else {
   11889         // For a scalar, just generate 0x80...
   11890         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
   11891       }
   11892       SDLoc DL0(N0);
   11893       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
   11894                         DAG.getConstant(SignMask, DL0, IntVT));
   11895       AddToWorklist(Int.getNode());
   11896       return DAG.getBitcast(VT, Int);
   11897     }
   11898   }
   11899 
   11900   // (fneg (fmul c, x)) -> (fmul -c, x)
   11901   if (N0.getOpcode() == ISD::FMUL &&
   11902       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
   11903     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
   11904     if (CFP1) {
   11905       APFloat CVal = CFP1->getValueAPF();
   11906       CVal.changeSign();
   11907       if (Level >= AfterLegalizeDAG &&
   11908           (TLI.isFPImmLegal(CVal, VT) ||
   11909            TLI.isOperationLegal(ISD::ConstantFP, VT)))
   11910         return DAG.getNode(
   11911             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
   11912             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
   11913             N0->getFlags());
   11914     }
   11915   }
   11916 
   11917   return SDValue();
   11918 }
   11919 
   11920 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
   11921   SDValue N0 = N->getOperand(0);
   11922   SDValue N1 = N->getOperand(1);
   11923   EVT VT = N->getValueType(0);
   11924   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
   11925   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
   11926 
   11927   if (N0CFP && N1CFP) {
   11928     const APFloat &C0 = N0CFP->getValueAPF();
   11929     const APFloat &C1 = N1CFP->getValueAPF();
   11930     return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
   11931   }
   11932 
   11933   // Canonicalize to constant on RHS.
   11934   if (isConstantFPBuildVectorOrConstantFP(N0) &&
   11935      !isConstantFPBuildVectorOrConstantFP(N1))
   11936     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
   11937 
   11938   return SDValue();
   11939 }
   11940 
   11941 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
   11942   SDValue N0 = N->getOperand(0);
   11943   SDValue N1 = N->getOperand(1);
   11944   EVT VT = N->getValueType(0);
   11945   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
   11946   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
   11947 
   11948   if (N0CFP && N1CFP) {
   11949     const APFloat &C0 = N0CFP->getValueAPF();
   11950     const APFloat &C1 = N1CFP->getValueAPF();
   11951     return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
   11952   }
   11953 
   11954   // Canonicalize to constant on RHS.
   11955   if (isConstantFPBuildVectorOrConstantFP(N0) &&
   11956      !isConstantFPBuildVectorOrConstantFP(N1))
   11957     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
   11958 
   11959   return SDValue();
   11960 }
   11961 
   11962 SDValue DAGCombiner::visitFABS(SDNode *N) {
   11963   SDValue N0 = N->getOperand(0);
   11964   EVT VT = N->getValueType(0);
   11965 
   11966   // fold (fabs c1) -> fabs(c1)
   11967   if (isConstantFPBuildVectorOrConstantFP(N0))
   11968     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
   11969 
   11970   // fold (fabs (fabs x)) -> (fabs x)
   11971   if (N0.getOpcode() == ISD::FABS)
   11972     return N->getOperand(0);
   11973 
   11974   // fold (fabs (fneg x)) -> (fabs x)
   11975   // fold (fabs (fcopysign x, y)) -> (fabs x)
   11976   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
   11977     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
   11978 
   11979   // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
   11980   // constant pool values.
   11981   if (!TLI.isFAbsFree(VT) &&
   11982       N0.getOpcode() == ISD::BITCAST &&
   11983       N0.getNode()->hasOneUse()) {
   11984     SDValue Int = N0.getOperand(0);
   11985     EVT IntVT = Int.getValueType();
   11986     if (IntVT.isInteger() && !IntVT.isVector()) {
   11987       APInt SignMask;
   11988       if (N0.getValueType().isVector()) {
   11989         // For a vector, get a mask such as 0x7f... per scalar element
   11990         // and splat it.
   11991         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
   11992         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
   11993       } else {
   11994         // For a scalar, just generate 0x7f...
   11995         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
   11996       }
   11997       SDLoc DL(N0);
   11998       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
   11999                         DAG.getConstant(SignMask, DL, IntVT));
   12000       AddToWorklist(Int.getNode());
   12001       return DAG.getBitcast(N->getValueType(0), Int);
   12002     }
   12003   }
   12004 
   12005   return SDValue();
   12006 }
   12007 
   12008 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
   12009   SDValue Chain = N->getOperand(0);
   12010   SDValue N1 = N->getOperand(1);
   12011   SDValue N2 = N->getOperand(2);
   12012 
   12013   // If N is a constant we could fold this into a fallthrough or unconditional
   12014   // branch. However that doesn't happen very often in normal code, because
   12015   // Instcombine/SimplifyCFG should have handled the available opportunities.
   12016   // If we did this folding here, it would be necessary to update the
   12017   // MachineBasicBlock CFG, which is awkward.
   12018 
   12019   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
   12020   // on the target.
   12021   if (N1.getOpcode() == ISD::SETCC &&
   12022       TLI.isOperationLegalOrCustom(ISD::BR_CC,
   12023                                    N1.getOperand(0).getValueType())) {
   12024     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
   12025                        Chain, N1.getOperand(2),
   12026                        N1.getOperand(0), N1.getOperand(1), N2);
   12027   }
   12028 
   12029   if (N1.hasOneUse()) {
   12030     if (SDValue NewN1 = rebuildSetCC(N1))
   12031       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
   12032   }
   12033 
   12034   return SDValue();
   12035 }
   12036 
   12037 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
   12038   if (N.getOpcode() == ISD::SRL ||
   12039       (N.getOpcode() == ISD::TRUNCATE &&
   12040        (N.getOperand(0).hasOneUse() &&
   12041         N.getOperand(0).getOpcode() == ISD::SRL))) {
   12042     // Look pass the truncate.
   12043     if (N.getOpcode() == ISD::TRUNCATE)
   12044       N = N.getOperand(0);
   12045 
   12046     // Match this pattern so that we can generate simpler code:
   12047     //
   12048     //   %a = ...
   12049     //   %b = and i32 %a, 2
   12050     //   %c = srl i32 %b, 1
   12051     //   brcond i32 %c ...
   12052     //
   12053     // into
   12054     //
   12055     //   %a = ...
   12056     //   %b = and i32 %a, 2
   12057     //   %c = setcc eq %b, 0
   12058     //   brcond %c ...
   12059     //
   12060     // This applies only when the AND constant value has one bit set and the
   12061     // SRL constant is equal to the log2 of the AND constant. The back-end is
   12062     // smart enough to convert the result into a TEST/JMP sequence.
   12063     SDValue Op0 = N.getOperand(0);
   12064     SDValue Op1 = N.getOperand(1);
   12065 
   12066     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
   12067       SDValue AndOp1 = Op0.getOperand(1);
   12068 
   12069       if (AndOp1.getOpcode() == ISD::Constant) {
   12070         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
   12071 
   12072         if (AndConst.isPowerOf2() &&
   12073             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
   12074           SDLoc DL(N);
   12075           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
   12076                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
   12077                               ISD::SETNE);
   12078         }
   12079       }
   12080     }
   12081   }
   12082 
   12083   // Transform br(xor(x, y)) -> br(x != y)
   12084   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
   12085   if (N.getOpcode() == ISD::XOR) {
   12086     // Because we may call this on a speculatively constructed
   12087     // SimplifiedSetCC Node, we need to simplify this node first.
   12088     // Ideally this should be folded into SimplifySetCC and not
   12089     // here. For now, grab a handle to N so we don't lose it from
   12090     // replacements interal to the visit.
   12091     HandleSDNode XORHandle(N);
   12092     while (N.getOpcode() == ISD::XOR) {
   12093       SDValue Tmp = visitXOR(N.getNode());
   12094       // No simplification done.
   12095       if (!Tmp.getNode())
   12096         break;
   12097       // Returning N is form in-visit replacement that may invalidated
   12098       // N. Grab value from Handle.
   12099       if (Tmp.getNode() == N.getNode())
   12100         N = XORHandle.getValue();
   12101       else // Node simplified. Try simplifying again.
   12102         N = Tmp;
   12103     }
   12104 
   12105     if (N.getOpcode() != ISD::XOR)
   12106       return N;
   12107 
   12108     SDNode *TheXor = N.getNode();
   12109 
   12110     SDValue Op0 = TheXor->getOperand(0);
   12111     SDValue Op1 = TheXor->getOperand(1);
   12112 
   12113     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
   12114       bool Equal = false;
   12115       if (isOneConstant(Op0) && Op0.hasOneUse() &&
   12116           Op0.getOpcode() == ISD::XOR) {
   12117         TheXor = Op0.getNode();
   12118         Equal = true;
   12119       }
   12120 
   12121       EVT SetCCVT = N.getValueType();
   12122       if (LegalTypes)
   12123         SetCCVT = getSetCCResultType(SetCCVT);
   12124       // Replace the uses of XOR with SETCC
   12125       return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
   12126                           Equal ? ISD::SETEQ : ISD::SETNE);
   12127     }
   12128   }
   12129 
   12130   return SDValue();
   12131 }
   12132 
   12133 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
   12134 //
   12135 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
   12136   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
   12137   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
   12138 
   12139   // If N is a constant we could fold this into a fallthrough or unconditional
   12140   // branch. However that doesn't happen very often in normal code, because
   12141   // Instcombine/SimplifyCFG should have handled the available opportunities.
   12142   // If we did this folding here, it would be necessary to update the
   12143   // MachineBasicBlock CFG, which is awkward.
   12144 
   12145   // Use SimplifySetCC to simplify SETCC's.
   12146   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
   12147                                CondLHS, CondRHS, CC->get(), SDLoc(N),
   12148                                false);
   12149   if (Simp.getNode()) AddToWorklist(Simp.getNode());
   12150 
   12151   // fold to a simpler setcc
   12152   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
   12153     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
   12154                        N->getOperand(0), Simp.getOperand(2),
   12155                        Simp.getOperand(0), Simp.getOperand(1),
   12156                        N->getOperand(4));
   12157 
   12158   return SDValue();
   12159 }
   12160 
   12161 /// Return true if 'Use' is a load or a store that uses N as its base pointer
   12162 /// and that N may be folded in the load / store addressing mode.
   12163 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
   12164                                     SelectionDAG &DAG,
   12165                                     const TargetLowering &TLI) {
   12166   EVT VT;
   12167   unsigned AS;
   12168 
   12169   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
   12170     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
   12171       return false;
   12172     VT = LD->getMemoryVT();
   12173     AS = LD->getAddressSpace();
   12174   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
   12175     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
   12176       return false;
   12177     VT = ST->getMemoryVT();
   12178     AS = ST->getAddressSpace();
   12179   } else
   12180     return false;
   12181 
   12182   TargetLowering::AddrMode AM;
   12183   if (N->getOpcode() == ISD::ADD) {
   12184     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
   12185     if (Offset)
   12186       // [reg +/- imm]
   12187       AM.BaseOffs = Offset->getSExtValue();
   12188     else
   12189       // [reg +/- reg]
   12190       AM.Scale = 1;
   12191   } else if (N->getOpcode() == ISD::SUB) {
   12192     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
   12193     if (Offset)
   12194       // [reg +/- imm]
   12195       AM.BaseOffs = -Offset->getSExtValue();
   12196     else
   12197       // [reg +/- reg]
   12198       AM.Scale = 1;
   12199   } else
   12200     return false;
   12201 
   12202   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
   12203                                    VT.getTypeForEVT(*DAG.getContext()), AS);
   12204 }
   12205 
   12206 /// Try turning a load/store into a pre-indexed load/store when the base
   12207 /// pointer is an add or subtract and it has other uses besides the load/store.
   12208 /// After the transformation, the new indexed load/store has effectively folded
   12209 /// the add/subtract in and all of its other uses are redirected to the
   12210 /// new load/store.
   12211 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
   12212   if (Level < AfterLegalizeDAG)
   12213     return false;
   12214 
   12215   bool isLoad = true;
   12216   SDValue Ptr;
   12217   EVT VT;
   12218   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
   12219     if (LD->isIndexed())
   12220       return false;
   12221     VT = LD->getMemoryVT();
   12222     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
   12223         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
   12224       return false;
   12225     Ptr = LD->getBasePtr();
   12226   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
   12227     if (ST->isIndexed())
   12228       return false;
   12229     VT = ST->getMemoryVT();
   12230     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
   12231         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
   12232       return false;
   12233     Ptr = ST->getBasePtr();
   12234     isLoad = false;
   12235   } else {
   12236     return false;
   12237   }
   12238 
   12239   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
   12240   // out.  There is no reason to make this a preinc/predec.
   12241   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
   12242       Ptr.getNode()->hasOneUse())
   12243     return false;
   12244 
   12245   // Ask the target to do addressing mode selection.
   12246   SDValue BasePtr;
   12247   SDValue Offset;
   12248   ISD::MemIndexedMode AM = ISD::UNINDEXED;
   12249   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
   12250     return false;
   12251 
   12252   // Backends without true r+i pre-indexed forms may need to pass a
   12253   // constant base with a variable offset so that constant coercion
   12254   // will work with the patterns in canonical form.
   12255   bool Swapped = false;
   12256   if (isa<ConstantSDNode>(BasePtr)) {
   12257     std::swap(BasePtr, Offset);
   12258     Swapped = true;
   12259   }
   12260 
   12261   // Don't create a indexed load / store with zero offset.
   12262   if (isNullConstant(Offset))
   12263     return false;
   12264 
   12265   // Try turning it into a pre-indexed load / store except when:
   12266   // 1) The new base ptr is a frame index.
   12267   // 2) If N is a store and the new base ptr is either the same as or is a
   12268   //    predecessor of the value being stored.
   12269   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
   12270   //    that would create a cycle.
   12271   // 4) All uses are load / store ops that use it as old base ptr.
   12272 
   12273   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
   12274   // (plus the implicit offset) to a register to preinc anyway.
   12275   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
   12276     return false;
   12277 
   12278   // Check #2.
   12279   if (!isLoad) {
   12280     SDValue Val = cast<StoreSDNode>(N)->getValue();
   12281     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
   12282       return false;
   12283   }
   12284 
   12285   // Caches for hasPredecessorHelper.
   12286   SmallPtrSet<const SDNode *, 32> Visited;
   12287   SmallVector<const SDNode *, 16> Worklist;
   12288   Worklist.push_back(N);
   12289 
   12290   // If the offset is a constant, there may be other adds of constants that
   12291   // can be folded with this one. We should do this to avoid having to keep
   12292   // a copy of the original base pointer.
   12293   SmallVector<SDNode *, 16> OtherUses;
   12294   if (isa<ConstantSDNode>(Offset))
   12295     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
   12296                               UE = BasePtr.getNode()->use_end();
   12297          UI != UE; ++UI) {
   12298       SDUse &Use = UI.getUse();
   12299       // Skip the use that is Ptr and uses of other results from BasePtr's
   12300       // node (important for nodes that return multiple results).
   12301       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
   12302         continue;
   12303 
   12304       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
   12305         continue;
   12306 
   12307       if (Use.getUser()->getOpcode() != ISD::ADD &&
   12308           Use.getUser()->getOpcode() != ISD::SUB) {
   12309         OtherUses.clear();
   12310         break;
   12311       }
   12312 
   12313       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
   12314       if (!isa<ConstantSDNode>(Op1)) {
   12315         OtherUses.clear();
   12316         break;
   12317       }
   12318 
   12319       // FIXME: In some cases, we can be smarter about this.
   12320       if (Op1.getValueType() != Offset.getValueType()) {
   12321         OtherUses.clear();
   12322         break;
   12323       }
   12324 
   12325       OtherUses.push_back(Use.getUser());
   12326     }
   12327 
   12328   if (Swapped)
   12329     std::swap(BasePtr, Offset);
   12330 
   12331   // Now check for #3 and #4.
   12332   bool RealUse = false;
   12333 
   12334   for (SDNode *Use : Ptr.getNode()->uses()) {
   12335     if (Use == N)
   12336       continue;
   12337     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
   12338       return false;
   12339 
   12340     // If Ptr may be folded in addressing mode of other use, then it's
   12341     // not profitable to do this transformation.
   12342     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
   12343       RealUse = true;
   12344   }
   12345 
   12346   if (!RealUse)
   12347     return false;
   12348 
   12349   SDValue Result;
   12350   if (isLoad)
   12351     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
   12352                                 BasePtr, Offset, AM);
   12353   else
   12354     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
   12355                                  BasePtr, Offset, AM);
   12356   ++PreIndexedNodes;
   12357   ++NodesCombined;
   12358   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
   12359              Result.getNode()->dump(&DAG); dbgs() << '\n');
   12360   WorklistRemover DeadNodes(*this);
   12361   if (isLoad) {
   12362     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
   12363     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
   12364   } else {
   12365     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
   12366   }
   12367 
   12368   // Finally, since the node is now dead, remove it from the graph.
   12369   deleteAndRecombine(N);
   12370 
   12371   if (Swapped)
   12372     std::swap(BasePtr, Offset);
   12373 
   12374   // Replace other uses of BasePtr that can be updated to use Ptr
   12375   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
   12376     unsigned OffsetIdx = 1;
   12377     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
   12378       OffsetIdx = 0;
   12379     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
   12380            BasePtr.getNode() && "Expected BasePtr operand");
   12381 
   12382     // We need to replace ptr0 in the following expression:
   12383     //   x0 * offset0 + y0 * ptr0 = t0
   12384     // knowing that
   12385     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
   12386     //
   12387     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
   12388     // indexed load/store and the expression that needs to be re-written.
   12389     //
   12390     // Therefore, we have:
   12391     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
   12392 
   12393     ConstantSDNode *CN =
   12394       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
   12395     int X0, X1, Y0, Y1;
   12396     const APInt &Offset0 = CN->getAPIntValue();
   12397     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
   12398 
   12399     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
   12400     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
   12401     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
   12402     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
   12403 
   12404     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
   12405 
   12406     APInt CNV = Offset0;
   12407     if (X0 < 0) CNV = -CNV;
   12408     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
   12409     else CNV = CNV - Offset1;
   12410 
   12411     SDLoc DL(OtherUses[i]);
   12412 
   12413     // We can now generate the new expression.
   12414     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
   12415     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
   12416 
   12417     SDValue NewUse = DAG.getNode(Opcode,
   12418                                  DL,
   12419                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
   12420     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
   12421     deleteAndRecombine(OtherUses[i]);
   12422   }
   12423 
   12424   // Replace the uses of Ptr with uses of the updated base value.
   12425   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
   12426   deleteAndRecombine(Ptr.getNode());
   12427   AddToWorklist(Result.getNode());
   12428 
   12429   return true;
   12430 }
   12431 
   12432 /// Try to combine a load/store with a add/sub of the base pointer node into a
   12433 /// post-indexed load/store. The transformation folded the add/subtract into the
   12434 /// new indexed load/store effectively and all of its uses are redirected to the
   12435 /// new load/store.
   12436 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
   12437   if (Level < AfterLegalizeDAG)
   12438     return false;
   12439 
   12440   bool isLoad = true;
   12441   SDValue Ptr;
   12442   EVT VT;
   12443   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
   12444     if (LD->isIndexed())
   12445       return false;
   12446     VT = LD->getMemoryVT();
   12447     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
   12448         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
   12449       return false;
   12450     Ptr = LD->getBasePtr();
   12451   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
   12452     if (ST->isIndexed())
   12453       return false;
   12454     VT = ST->getMemoryVT();
   12455     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
   12456         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
   12457       return false;
   12458     Ptr = ST->getBasePtr();
   12459     isLoad = false;
   12460   } else {
   12461     return false;
   12462   }
   12463 
   12464   if (Ptr.getNode()->hasOneUse())
   12465     return false;
   12466 
   12467   for (SDNode *Op : Ptr.getNode()->uses()) {
   12468     if (Op == N ||
   12469         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
   12470       continue;
   12471 
   12472     SDValue BasePtr;
   12473     SDValue Offset;
   12474     ISD::MemIndexedMode AM = ISD::UNINDEXED;
   12475     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
   12476       // Don't create a indexed load / store with zero offset.
   12477       if (isNullConstant(Offset))
   12478         continue;
   12479 
   12480       // Try turning it into a post-indexed load / store except when
   12481       // 1) All uses are load / store ops that use it as base ptr (and
   12482       //    it may be folded as addressing mmode).
   12483       // 2) Op must be independent of N, i.e. Op is neither a predecessor
   12484       //    nor a successor of N. Otherwise, if Op is folded that would
   12485       //    create a cycle.
   12486 
   12487       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
   12488         continue;
   12489 
   12490       // Check for #1.
   12491       bool TryNext = false;
   12492       for (SDNode *Use : BasePtr.getNode()->uses()) {
   12493         if (Use == Ptr.getNode())
   12494           continue;
   12495 
   12496         // If all the uses are load / store addresses, then don't do the
   12497         // transformation.
   12498         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
   12499           bool RealUse = false;
   12500           for (SDNode *UseUse : Use->uses()) {
   12501             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
   12502               RealUse = true;
   12503           }
   12504 
   12505           if (!RealUse) {
   12506             TryNext = true;
   12507             break;
   12508           }
   12509         }
   12510       }
   12511 
   12512       if (TryNext)
   12513         continue;
   12514 
   12515       // Check for #2
   12516       if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
   12517         SDValue Result = isLoad
   12518           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
   12519                                BasePtr, Offset, AM)
   12520           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
   12521                                 BasePtr, Offset, AM);
   12522         ++PostIndexedNodes;
   12523         ++NodesCombined;
   12524         LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
   12525                    dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
   12526                    dbgs() << '\n');
   12527         WorklistRemover DeadNodes(*this);
   12528         if (isLoad) {
   12529           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
   12530           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
   12531         } else {
   12532           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
   12533         }
   12534 
   12535         // Finally, since the node is now dead, remove it from the graph.
   12536         deleteAndRecombine(N);
   12537 
   12538         // Replace the uses of Use with uses of the updated base value.
   12539         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
   12540                                       Result.getValue(isLoad ? 1 : 0));
   12541         deleteAndRecombine(Op);
   12542         return true;
   12543       }
   12544     }
   12545   }
   12546 
   12547   return false;
   12548 }
   12549 
   12550 /// Return the base-pointer arithmetic from an indexed \p LD.
   12551 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
   12552   ISD::MemIndexedMode AM = LD->getAddressingMode();
   12553   assert(AM != ISD::UNINDEXED);
   12554   SDValue BP = LD->getOperand(1);
   12555   SDValue Inc = LD->getOperand(2);
   12556 
   12557   // Some backends use TargetConstants for load offsets, but don't expect
   12558   // TargetConstants in general ADD nodes. We can convert these constants into
   12559   // regular Constants (if the constant is not opaque).
   12560   assert((Inc.getOpcode() != ISD::TargetConstant ||
   12561           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
   12562          "Cannot split out indexing using opaque target constants");
   12563   if (Inc.getOpcode() == ISD::TargetConstant) {
   12564     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
   12565     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
   12566                           ConstInc->getValueType(0));
   12567   }
   12568 
   12569   unsigned Opc =
   12570       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
   12571   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
   12572 }
   12573 
   12574 SDValue DAGCombiner::visitLOAD(SDNode *N) {
   12575   LoadSDNode *LD  = cast<LoadSDNode>(N);
   12576   SDValue Chain = LD->getChain();
   12577   SDValue Ptr   = LD->getBasePtr();
   12578 
   12579   // If load is not volatile and there are no uses of the loaded value (and
   12580   // the updated indexed value in case of indexed loads), change uses of the
   12581   // chain value into uses of the chain input (i.e. delete the dead load).
   12582   if (!LD->isVolatile()) {
   12583     if (N->getValueType(1) == MVT::Other) {
   12584       // Unindexed loads.
   12585       if (!N->hasAnyUseOfValue(0)) {
   12586         // It's not safe to use the two value CombineTo variant here. e.g.
   12587         // v1, chain2 = load chain1, loc
   12588         // v2, chain3 = load chain2, loc
   12589         // v3         = add v2, c
   12590         // Now we replace use of chain2 with chain1.  This makes the second load
   12591         // isomorphic to the one we are deleting, and thus makes this load live.
   12592         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
   12593                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
   12594                    dbgs() << "\n");
   12595         WorklistRemover DeadNodes(*this);
   12596         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
   12597         AddUsersToWorklist(Chain.getNode());
   12598         if (N->use_empty())
   12599           deleteAndRecombine(N);
   12600 
   12601         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   12602       }
   12603     } else {
   12604       // Indexed loads.
   12605       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
   12606 
   12607       // If this load has an opaque TargetConstant offset, then we cannot split
   12608       // the indexing into an add/sub directly (that TargetConstant may not be
   12609       // valid for a different type of node, and we cannot convert an opaque
   12610       // target constant into a regular constant).
   12611       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
   12612                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
   12613 
   12614       if (!N->hasAnyUseOfValue(0) &&
   12615           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
   12616         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
   12617         SDValue Index;
   12618         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
   12619           Index = SplitIndexingFromLoad(LD);
   12620           // Try to fold the base pointer arithmetic into subsequent loads and
   12621           // stores.
   12622           AddUsersToWorklist(N);
   12623         } else
   12624           Index = DAG.getUNDEF(N->getValueType(1));
   12625         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
   12626                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
   12627                    dbgs() << " and 2 other values\n");
   12628         WorklistRemover DeadNodes(*this);
   12629         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
   12630         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
   12631         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
   12632         deleteAndRecombine(N);
   12633         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   12634       }
   12635     }
   12636   }
   12637 
   12638   // If this load is directly stored, replace the load value with the stored
   12639   // value.
   12640   // TODO: Handle store large -> read small portion.
   12641   // TODO: Handle TRUNCSTORE/LOADEXT
   12642   if (OptLevel != CodeGenOpt::None &&
   12643       ISD::isNormalLoad(N) && !LD->isVolatile()) {
   12644     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
   12645       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
   12646       if (PrevST->getBasePtr() == Ptr &&
   12647           PrevST->getValue().getValueType() == N->getValueType(0))
   12648         return CombineTo(N, PrevST->getOperand(1), Chain);
   12649     }
   12650   }
   12651 
   12652   // Try to infer better alignment information than the load already has.
   12653   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
   12654     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
   12655       if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
   12656         SDValue NewLoad = DAG.getExtLoad(
   12657             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
   12658             LD->getPointerInfo(), LD->getMemoryVT(), Align,
   12659             LD->getMemOperand()->getFlags(), LD->getAAInfo());
   12660         // NewLoad will always be N as we are only refining the alignment
   12661         assert(NewLoad.getNode() == N);
   12662         (void)NewLoad;
   12663       }
   12664     }
   12665   }
   12666 
   12667   if (LD->isUnindexed()) {
   12668     // Walk up chain skipping non-aliasing memory nodes.
   12669     SDValue BetterChain = FindBetterChain(N, Chain);
   12670 
   12671     // If there is a better chain.
   12672     if (Chain != BetterChain) {
   12673       SDValue ReplLoad;
   12674 
   12675       // Replace the chain to void dependency.
   12676       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
   12677         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
   12678                                BetterChain, Ptr, LD->getMemOperand());
   12679       } else {
   12680         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
   12681                                   LD->getValueType(0),
   12682                                   BetterChain, Ptr, LD->getMemoryVT(),
   12683                                   LD->getMemOperand());
   12684       }
   12685 
   12686       // Create token factor to keep old chain connected.
   12687       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
   12688                                   MVT::Other, Chain, ReplLoad.getValue(1));
   12689 
   12690       // Replace uses with load result and token factor
   12691       return CombineTo(N, ReplLoad.getValue(0), Token);
   12692     }
   12693   }
   12694 
   12695   // Try transforming N to an indexed load.
   12696   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
   12697     return SDValue(N, 0);
   12698 
   12699   // Try to slice up N to more direct loads if the slices are mapped to
   12700   // different register banks or pairing can take place.
   12701   if (SliceUpLoad(N))
   12702     return SDValue(N, 0);
   12703 
   12704   return SDValue();
   12705 }
   12706 
   12707 namespace {
   12708 
   12709 /// Helper structure used to slice a load in smaller loads.
   12710 /// Basically a slice is obtained from the following sequence:
   12711 /// Origin = load Ty1, Base
   12712 /// Shift = srl Ty1 Origin, CstTy Amount
   12713 /// Inst = trunc Shift to Ty2
   12714 ///
   12715 /// Then, it will be rewritten into:
   12716 /// Slice = load SliceTy, Base + SliceOffset
   12717 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
   12718 ///
   12719 /// SliceTy is deduced from the number of bits that are actually used to
   12720 /// build Inst.
   12721 struct LoadedSlice {
   12722   /// Helper structure used to compute the cost of a slice.
   12723   struct Cost {
   12724     /// Are we optimizing for code size.
   12725     bool ForCodeSize;
   12726 
   12727     /// Various cost.
   12728     unsigned Loads = 0;
   12729     unsigned Truncates = 0;
   12730     unsigned CrossRegisterBanksCopies = 0;
   12731     unsigned ZExts = 0;
   12732     unsigned Shift = 0;
   12733 
   12734     Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
   12735 
   12736     /// Get the cost of one isolated slice.
   12737     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
   12738         : ForCodeSize(ForCodeSize), Loads(1) {
   12739       EVT TruncType = LS.Inst->getValueType(0);
   12740       EVT LoadedType = LS.getLoadedType();
   12741       if (TruncType != LoadedType &&
   12742           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
   12743         ZExts = 1;
   12744     }
   12745 
   12746     /// Account for slicing gain in the current cost.
   12747     /// Slicing provide a few gains like removing a shift or a
   12748     /// truncate. This method allows to grow the cost of the original
   12749     /// load with the gain from this slice.
   12750     void addSliceGain(const LoadedSlice &LS) {
   12751       // Each slice saves a truncate.
   12752       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
   12753       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
   12754                               LS.Inst->getValueType(0)))
   12755         ++Truncates;
   12756       // If there is a shift amount, this slice gets rid of it.
   12757       if (LS.Shift)
   12758         ++Shift;
   12759       // If this slice can merge a cross register bank copy, account for it.
   12760       if (LS.canMergeExpensiveCrossRegisterBankCopy())
   12761         ++CrossRegisterBanksCopies;
   12762     }
   12763 
   12764     Cost &operator+=(const Cost &RHS) {
   12765       Loads += RHS.Loads;
   12766       Truncates += RHS.Truncates;
   12767       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
   12768       ZExts += RHS.ZExts;
   12769       Shift += RHS.Shift;
   12770       return *this;
   12771     }
   12772 
   12773     bool operator==(const Cost &RHS) const {
   12774       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
   12775              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
   12776              ZExts == RHS.ZExts && Shift == RHS.Shift;
   12777     }
   12778 
   12779     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
   12780 
   12781     bool operator<(const Cost &RHS) const {
   12782       // Assume cross register banks copies are as expensive as loads.
   12783       // FIXME: Do we want some more target hooks?
   12784       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
   12785       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
   12786       // Unless we are optimizing for code size, consider the
   12787       // expensive operation first.
   12788       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
   12789         return ExpensiveOpsLHS < ExpensiveOpsRHS;
   12790       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
   12791              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
   12792     }
   12793 
   12794     bool operator>(const Cost &RHS) const { return RHS < *this; }
   12795 
   12796     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
   12797 
   12798     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
   12799   };
   12800 
   12801   // The last instruction that represent the slice. This should be a
   12802   // truncate instruction.
   12803   SDNode *Inst;
   12804 
   12805   // The original load instruction.
   12806   LoadSDNode *Origin;
   12807 
   12808   // The right shift amount in bits from the original load.
   12809   unsigned Shift;
   12810 
   12811   // The DAG from which Origin came from.
   12812   // This is used to get some contextual information about legal types, etc.
   12813   SelectionDAG *DAG;
   12814 
   12815   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
   12816               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
   12817       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
   12818 
   12819   /// Get the bits used in a chunk of bits \p BitWidth large.
   12820   /// \return Result is \p BitWidth and has used bits set to 1 and
   12821   ///         not used bits set to 0.
   12822   APInt getUsedBits() const {
   12823     // Reproduce the trunc(lshr) sequence:
   12824     // - Start from the truncated value.
   12825     // - Zero extend to the desired bit width.
   12826     // - Shift left.
   12827     assert(Origin && "No original load to compare against.");
   12828     unsigned BitWidth = Origin->getValueSizeInBits(0);
   12829     assert(Inst && "This slice is not bound to an instruction");
   12830     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
   12831            "Extracted slice is bigger than the whole type!");
   12832     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
   12833     UsedBits.setAllBits();
   12834     UsedBits = UsedBits.zext(BitWidth);
   12835     UsedBits <<= Shift;
   12836     return UsedBits;
   12837   }
   12838 
   12839   /// Get the size of the slice to be loaded in bytes.
   12840   unsigned getLoadedSize() const {
   12841     unsigned SliceSize = getUsedBits().countPopulation();
   12842     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
   12843     return SliceSize / 8;
   12844   }
   12845 
   12846   /// Get the type that will be loaded for this slice.
   12847   /// Note: This may not be the final type for the slice.
   12848   EVT getLoadedType() const {
   12849     assert(DAG && "Missing context");
   12850     LLVMContext &Ctxt = *DAG->getContext();
   12851     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
   12852   }
   12853 
   12854   /// Get the alignment of the load used for this slice.
   12855   unsigned getAlignment() const {
   12856     unsigned Alignment = Origin->getAlignment();
   12857     unsigned Offset = getOffsetFromBase();
   12858     if (Offset != 0)
   12859       Alignment = MinAlign(Alignment, Alignment + Offset);
   12860     return Alignment;
   12861   }
   12862 
   12863   /// Check if this slice can be rewritten with legal operations.
   12864   bool isLegal() const {
   12865     // An invalid slice is not legal.
   12866     if (!Origin || !Inst || !DAG)
   12867       return false;
   12868 
   12869     // Offsets are for indexed load only, we do not handle that.
   12870     if (!Origin->getOffset().isUndef())
   12871       return false;
   12872 
   12873     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
   12874 
   12875     // Check that the type is legal.
   12876     EVT SliceType = getLoadedType();
   12877     if (!TLI.isTypeLegal(SliceType))
   12878       return false;
   12879 
   12880     // Check that the load is legal for this type.
   12881     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
   12882       return false;
   12883 
   12884     // Check that the offset can be computed.
   12885     // 1. Check its type.
   12886     EVT PtrType = Origin->getBasePtr().getValueType();
   12887     if (PtrType == MVT::Untyped || PtrType.isExtended())
   12888       return false;
   12889 
   12890     // 2. Check that it fits in the immediate.
   12891     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
   12892       return false;
   12893 
   12894     // 3. Check that the computation is legal.
   12895     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
   12896       return false;
   12897 
   12898     // Check that the zext is legal if it needs one.
   12899     EVT TruncateType = Inst->getValueType(0);
   12900     if (TruncateType != SliceType &&
   12901         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
   12902       return false;
   12903 
   12904     return true;
   12905   }
   12906 
   12907   /// Get the offset in bytes of this slice in the original chunk of
   12908   /// bits.
   12909   /// \pre DAG != nullptr.
   12910   uint64_t getOffsetFromBase() const {
   12911     assert(DAG && "Missing context.");
   12912     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
   12913     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
   12914     uint64_t Offset = Shift / 8;
   12915     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
   12916     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
   12917            "The size of the original loaded type is not a multiple of a"
   12918            " byte.");
   12919     // If Offset is bigger than TySizeInBytes, it means we are loading all
   12920     // zeros. This should have been optimized before in the process.
   12921     assert(TySizeInBytes > Offset &&
   12922            "Invalid shift amount for given loaded size");
   12923     if (IsBigEndian)
   12924       Offset = TySizeInBytes - Offset - getLoadedSize();
   12925     return Offset;
   12926   }
   12927 
   12928   /// Generate the sequence of instructions to load the slice
   12929   /// represented by this object and redirect the uses of this slice to
   12930   /// this new sequence of instructions.
   12931   /// \pre this->Inst && this->Origin are valid Instructions and this
   12932   /// object passed the legal check: LoadedSlice::isLegal returned true.
   12933   /// \return The last instruction of the sequence used to load the slice.
   12934   SDValue loadSlice() const {
   12935     assert(Inst && Origin && "Unable to replace a non-existing slice.");
   12936     const SDValue &OldBaseAddr = Origin->getBasePtr();
   12937     SDValue BaseAddr = OldBaseAddr;
   12938     // Get the offset in that chunk of bytes w.r.t. the endianness.
   12939     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
   12940     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
   12941     if (Offset) {
   12942       // BaseAddr = BaseAddr + Offset.
   12943       EVT ArithType = BaseAddr.getValueType();
   12944       SDLoc DL(Origin);
   12945       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
   12946                               DAG->getConstant(Offset, DL, ArithType));
   12947     }
   12948 
   12949     // Create the type of the loaded slice according to its size.
   12950     EVT SliceType = getLoadedType();
   12951 
   12952     // Create the load for the slice.
   12953     SDValue LastInst =
   12954         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
   12955                      Origin->getPointerInfo().getWithOffset(Offset),
   12956                      getAlignment(), Origin->getMemOperand()->getFlags());
   12957     // If the final type is not the same as the loaded type, this means that
   12958     // we have to pad with zero. Create a zero extend for that.
   12959     EVT FinalType = Inst->getValueType(0);
   12960     if (SliceType != FinalType)
   12961       LastInst =
   12962           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
   12963     return LastInst;
   12964   }
   12965 
   12966   /// Check if this slice can be merged with an expensive cross register
   12967   /// bank copy. E.g.,
   12968   /// i = load i32
   12969   /// f = bitcast i32 i to float
   12970   bool canMergeExpensiveCrossRegisterBankCopy() const {
   12971     if (!Inst || !Inst->hasOneUse())
   12972       return false;
   12973     SDNode *Use = *Inst->use_begin();
   12974     if (Use->getOpcode() != ISD::BITCAST)
   12975       return false;
   12976     assert(DAG && "Missing context");
   12977     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
   12978     EVT ResVT = Use->getValueType(0);
   12979     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
   12980     const TargetRegisterClass *ArgRC =
   12981         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
   12982     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
   12983       return false;
   12984 
   12985     // At this point, we know that we perform a cross-register-bank copy.
   12986     // Check if it is expensive.
   12987     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
   12988     // Assume bitcasts are cheap, unless both register classes do not
   12989     // explicitly share a common sub class.
   12990     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
   12991       return false;
   12992 
   12993     // Check if it will be merged with the load.
   12994     // 1. Check the alignment constraint.
   12995     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
   12996         ResVT.getTypeForEVT(*DAG->getContext()));
   12997 
   12998     if (RequiredAlignment > getAlignment())
   12999       return false;
   13000 
   13001     // 2. Check that the load is a legal operation for that type.
   13002     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
   13003       return false;
   13004 
   13005     // 3. Check that we do not have a zext in the way.
   13006     if (Inst->getValueType(0) != getLoadedType())
   13007       return false;
   13008 
   13009     return true;
   13010   }
   13011 };
   13012 
   13013 } // end anonymous namespace
   13014 
   13015 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
   13016 /// \p UsedBits looks like 0..0 1..1 0..0.
   13017 static bool areUsedBitsDense(const APInt &UsedBits) {
   13018   // If all the bits are one, this is dense!
   13019   if (UsedBits.isAllOnesValue())
   13020     return true;
   13021 
   13022   // Get rid of the unused bits on the right.
   13023   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
   13024   // Get rid of the unused bits on the left.
   13025   if (NarrowedUsedBits.countLeadingZeros())
   13026     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
   13027   // Check that the chunk of bits is completely used.
   13028   return NarrowedUsedBits.isAllOnesValue();
   13029 }
   13030 
   13031 /// Check whether or not \p First and \p Second are next to each other
   13032 /// in memory. This means that there is no hole between the bits loaded
   13033 /// by \p First and the bits loaded by \p Second.
   13034 static bool areSlicesNextToEachOther(const LoadedSlice &First,
   13035                                      const LoadedSlice &Second) {
   13036   assert(First.Origin == Second.Origin && First.Origin &&
   13037          "Unable to match different memory origins.");
   13038   APInt UsedBits = First.getUsedBits();
   13039   assert((UsedBits & Second.getUsedBits()) == 0 &&
   13040          "Slices are not supposed to overlap.");
   13041   UsedBits |= Second.getUsedBits();
   13042   return areUsedBitsDense(UsedBits);
   13043 }
   13044 
   13045 /// Adjust the \p GlobalLSCost according to the target
   13046 /// paring capabilities and the layout of the slices.
   13047 /// \pre \p GlobalLSCost should account for at least as many loads as
   13048 /// there is in the slices in \p LoadedSlices.
   13049 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
   13050                                  LoadedSlice::Cost &GlobalLSCost) {
   13051   unsigned NumberOfSlices = LoadedSlices.size();
   13052   // If there is less than 2 elements, no pairing is possible.
   13053   if (NumberOfSlices < 2)
   13054     return;
   13055 
   13056   // Sort the slices so that elements that are likely to be next to each
   13057   // other in memory are next to each other in the list.
   13058   llvm::sort(LoadedSlices.begin(), LoadedSlices.end(),
   13059              [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
   13060     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
   13061     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
   13062   });
   13063   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
   13064   // First (resp. Second) is the first (resp. Second) potentially candidate
   13065   // to be placed in a paired load.
   13066   const LoadedSlice *First = nullptr;
   13067   const LoadedSlice *Second = nullptr;
   13068   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
   13069                 // Set the beginning of the pair.
   13070                                                            First = Second) {
   13071     Second = &LoadedSlices[CurrSlice];
   13072 
   13073     // If First is NULL, it means we start a new pair.
   13074     // Get to the next slice.
   13075     if (!First)
   13076       continue;
   13077 
   13078     EVT LoadedType = First->getLoadedType();
   13079 
   13080     // If the types of the slices are different, we cannot pair them.
   13081     if (LoadedType != Second->getLoadedType())
   13082       continue;
   13083 
   13084     // Check if the target supplies paired loads for this type.
   13085     unsigned RequiredAlignment = 0;
   13086     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
   13087       // move to the next pair, this type is hopeless.
   13088       Second = nullptr;
   13089       continue;
   13090     }
   13091     // Check if we meet the alignment requirement.
   13092     if (RequiredAlignment > First->getAlignment())
   13093       continue;
   13094 
   13095     // Check that both loads are next to each other in memory.
   13096     if (!areSlicesNextToEachOther(*First, *Second))
   13097       continue;
   13098 
   13099     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
   13100     --GlobalLSCost.Loads;
   13101     // Move to the next pair.
   13102     Second = nullptr;
   13103   }
   13104 }
   13105 
   13106 /// Check the profitability of all involved LoadedSlice.
   13107 /// Currently, it is considered profitable if there is exactly two
   13108 /// involved slices (1) which are (2) next to each other in memory, and
   13109 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
   13110 ///
   13111 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
   13112 /// the elements themselves.
   13113 ///
   13114 /// FIXME: When the cost model will be mature enough, we can relax
   13115 /// constraints (1) and (2).
   13116 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
   13117                                 const APInt &UsedBits, bool ForCodeSize) {
   13118   unsigned NumberOfSlices = LoadedSlices.size();
   13119   if (StressLoadSlicing)
   13120     return NumberOfSlices > 1;
   13121 
   13122   // Check (1).
   13123   if (NumberOfSlices != 2)
   13124     return false;
   13125 
   13126   // Check (2).
   13127   if (!areUsedBitsDense(UsedBits))
   13128     return false;
   13129 
   13130   // Check (3).
   13131   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
   13132   // The original code has one big load.
   13133   OrigCost.Loads = 1;
   13134   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
   13135     const LoadedSlice &LS = LoadedSlices[CurrSlice];
   13136     // Accumulate the cost of all the slices.
   13137     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
   13138     GlobalSlicingCost += SliceCost;
   13139 
   13140     // Account as cost in the original configuration the gain obtained
   13141     // with the current slices.
   13142     OrigCost.addSliceGain(LS);
   13143   }
   13144 
   13145   // If the target supports paired load, adjust the cost accordingly.
   13146   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
   13147   return OrigCost > GlobalSlicingCost;
   13148 }
   13149 
   13150 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
   13151 /// operations, split it in the various pieces being extracted.
   13152 ///
   13153 /// This sort of thing is introduced by SROA.
   13154 /// This slicing takes care not to insert overlapping loads.
   13155 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
   13156 bool DAGCombiner::SliceUpLoad(SDNode *N) {
   13157   if (Level < AfterLegalizeDAG)
   13158     return false;
   13159 
   13160   LoadSDNode *LD = cast<LoadSDNode>(N);
   13161   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
   13162       !LD->getValueType(0).isInteger())
   13163     return false;
   13164 
   13165   // Keep track of already used bits to detect overlapping values.
   13166   // In that case, we will just abort the transformation.
   13167   APInt UsedBits(LD->getValueSizeInBits(0), 0);
   13168 
   13169   SmallVector<LoadedSlice, 4> LoadedSlices;
   13170 
   13171   // Check if this load is used as several smaller chunks of bits.
   13172   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
   13173   // of computation for each trunc.
   13174   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
   13175        UI != UIEnd; ++UI) {
   13176     // Skip the uses of the chain.
   13177     if (UI.getUse().getResNo() != 0)
   13178       continue;
   13179 
   13180     SDNode *User = *UI;
   13181     unsigned Shift = 0;
   13182 
   13183     // Check if this is a trunc(lshr).
   13184     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
   13185         isa<ConstantSDNode>(User->getOperand(1))) {
   13186       Shift = User->getConstantOperandVal(1);
   13187       User = *User->use_begin();
   13188     }
   13189 
   13190     // At this point, User is a Truncate, iff we encountered, trunc or
   13191     // trunc(lshr).
   13192     if (User->getOpcode() != ISD::TRUNCATE)
   13193       return false;
   13194 
   13195     // The width of the type must be a power of 2 and greater than 8-bits.
   13196     // Otherwise the load cannot be represented in LLVM IR.
   13197     // Moreover, if we shifted with a non-8-bits multiple, the slice
   13198     // will be across several bytes. We do not support that.
   13199     unsigned Width = User->getValueSizeInBits(0);
   13200     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
   13201       return false;
   13202 
   13203     // Build the slice for this chain of computations.
   13204     LoadedSlice LS(User, LD, Shift, &DAG);
   13205     APInt CurrentUsedBits = LS.getUsedBits();
   13206 
   13207     // Check if this slice overlaps with another.
   13208     if ((CurrentUsedBits & UsedBits) != 0)
   13209       return false;
   13210     // Update the bits used globally.
   13211     UsedBits |= CurrentUsedBits;
   13212 
   13213     // Check if the new slice would be legal.
   13214     if (!LS.isLegal())
   13215       return false;
   13216 
   13217     // Record the slice.
   13218     LoadedSlices.push_back(LS);
   13219   }
   13220 
   13221   // Abort slicing if it does not seem to be profitable.
   13222   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
   13223     return false;
   13224 
   13225   ++SlicedLoads;
   13226 
   13227   // Rewrite each chain to use an independent load.
   13228   // By construction, each chain can be represented by a unique load.
   13229 
   13230   // Prepare the argument for the new token factor for all the slices.
   13231   SmallVector<SDValue, 8> ArgChains;
   13232   for (SmallVectorImpl<LoadedSlice>::const_iterator
   13233            LSIt = LoadedSlices.begin(),
   13234            LSItEnd = LoadedSlices.end();
   13235        LSIt != LSItEnd; ++LSIt) {
   13236     SDValue SliceInst = LSIt->loadSlice();
   13237     CombineTo(LSIt->Inst, SliceInst, true);
   13238     if (SliceInst.getOpcode() != ISD::LOAD)
   13239       SliceInst = SliceInst.getOperand(0);
   13240     assert(SliceInst->getOpcode() == ISD::LOAD &&
   13241            "It takes more than a zext to get to the loaded slice!!");
   13242     ArgChains.push_back(SliceInst.getValue(1));
   13243   }
   13244 
   13245   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
   13246                               ArgChains);
   13247   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
   13248   AddToWorklist(Chain.getNode());
   13249   return true;
   13250 }
   13251 
   13252 /// Check to see if V is (and load (ptr), imm), where the load is having
   13253 /// specific bytes cleared out.  If so, return the byte size being masked out
   13254 /// and the shift amount.
   13255 static std::pair<unsigned, unsigned>
   13256 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
   13257   std::pair<unsigned, unsigned> Result(0, 0);
   13258 
   13259   // Check for the structure we're looking for.
   13260   if (V->getOpcode() != ISD::AND ||
   13261       !isa<ConstantSDNode>(V->getOperand(1)) ||
   13262       !ISD::isNormalLoad(V->getOperand(0).getNode()))
   13263     return Result;
   13264 
   13265   // Check the chain and pointer.
   13266   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
   13267   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
   13268 
   13269   // This only handles simple types.
   13270   if (V.getValueType() != MVT::i16 &&
   13271       V.getValueType() != MVT::i32 &&
   13272       V.getValueType() != MVT::i64)
   13273     return Result;
   13274 
   13275   // Check the constant mask.  Invert it so that the bits being masked out are
   13276   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
   13277   // follow the sign bit for uniformity.
   13278   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
   13279   unsigned NotMaskLZ = countLeadingZeros(NotMask);
   13280   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
   13281   unsigned NotMaskTZ = countTrailingZeros(NotMask);
   13282   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
   13283   if (NotMaskLZ == 64) return Result;  // All zero mask.
   13284 
   13285   // See if we have a continuous run of bits.  If so, we have 0*1+0*
   13286   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
   13287     return Result;
   13288 
   13289   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
   13290   if (V.getValueType() != MVT::i64 && NotMaskLZ)
   13291     NotMaskLZ -= 64-V.getValueSizeInBits();
   13292 
   13293   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
   13294   switch (MaskedBytes) {
   13295   case 1:
   13296   case 2:
   13297   case 4: break;
   13298   default: return Result; // All one mask, or 5-byte mask.
   13299   }
   13300 
   13301   // Verify that the first bit starts at a multiple of mask so that the access
   13302   // is aligned the same as the access width.
   13303   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
   13304 
   13305   // For narrowing to be valid, it must be the case that the load the
   13306   // immediately preceeding memory operation before the store.
   13307   if (LD == Chain.getNode())
   13308     ; // ok.
   13309   else if (Chain->getOpcode() == ISD::TokenFactor &&
   13310            SDValue(LD, 1).hasOneUse()) {
   13311     // LD has only 1 chain use so they are no indirect dependencies.
   13312     bool isOk = false;
   13313     for (const SDValue &ChainOp : Chain->op_values())
   13314       if (ChainOp.getNode() == LD) {
   13315         isOk = true;
   13316         break;
   13317       }
   13318     if (!isOk)
   13319       return Result;
   13320   } else
   13321     return Result; // Fail.
   13322 
   13323   Result.first = MaskedBytes;
   13324   Result.second = NotMaskTZ/8;
   13325   return Result;
   13326 }
   13327 
   13328 /// Check to see if IVal is something that provides a value as specified by
   13329 /// MaskInfo. If so, replace the specified store with a narrower store of
   13330 /// truncated IVal.
   13331 static SDNode *
   13332 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
   13333                                 SDValue IVal, StoreSDNode *St,
   13334                                 DAGCombiner *DC) {
   13335   unsigned NumBytes = MaskInfo.first;
   13336   unsigned ByteShift = MaskInfo.second;
   13337   SelectionDAG &DAG = DC->getDAG();
   13338 
   13339   // Check to see if IVal is all zeros in the part being masked in by the 'or'
   13340   // that uses this.  If not, this is not a replacement.
   13341   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
   13342                                   ByteShift*8, (ByteShift+NumBytes)*8);
   13343   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
   13344 
   13345   // Check that it is legal on the target to do this.  It is legal if the new
   13346   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
   13347   // legalization.
   13348   MVT VT = MVT::getIntegerVT(NumBytes*8);
   13349   if (!DC->isTypeLegal(VT))
   13350     return nullptr;
   13351 
   13352   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
   13353   // shifted by ByteShift and truncated down to NumBytes.
   13354   if (ByteShift) {
   13355     SDLoc DL(IVal);
   13356     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
   13357                        DAG.getConstant(ByteShift*8, DL,
   13358                                     DC->getShiftAmountTy(IVal.getValueType())));
   13359   }
   13360 
   13361   // Figure out the offset for the store and the alignment of the access.
   13362   unsigned StOffset;
   13363   unsigned NewAlign = St->getAlignment();
   13364 
   13365   if (DAG.getDataLayout().isLittleEndian())
   13366     StOffset = ByteShift;
   13367   else
   13368     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
   13369 
   13370   SDValue Ptr = St->getBasePtr();
   13371   if (StOffset) {
   13372     SDLoc DL(IVal);
   13373     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
   13374                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
   13375     NewAlign = MinAlign(NewAlign, StOffset);
   13376   }
   13377 
   13378   // Truncate down to the new size.
   13379   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
   13380 
   13381   ++OpsNarrowed;
   13382   return DAG
   13383       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
   13384                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
   13385       .getNode();
   13386 }
   13387 
   13388 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
   13389 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
   13390 /// narrowing the load and store if it would end up being a win for performance
   13391 /// or code size.
   13392 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
   13393   StoreSDNode *ST  = cast<StoreSDNode>(N);
   13394   if (ST->isVolatile())
   13395     return SDValue();
   13396 
   13397   SDValue Chain = ST->getChain();
   13398   SDValue Value = ST->getValue();
   13399   SDValue Ptr   = ST->getBasePtr();
   13400   EVT VT = Value.getValueType();
   13401 
   13402   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
   13403     return SDValue();
   13404 
   13405   unsigned Opc = Value.getOpcode();
   13406 
   13407   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
   13408   // is a byte mask indicating a consecutive number of bytes, check to see if
   13409   // Y is known to provide just those bytes.  If so, we try to replace the
   13410   // load + replace + store sequence with a single (narrower) store, which makes
   13411   // the load dead.
   13412   if (Opc == ISD::OR) {
   13413     std::pair<unsigned, unsigned> MaskedLoad;
   13414     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
   13415     if (MaskedLoad.first)
   13416       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
   13417                                                   Value.getOperand(1), ST,this))
   13418         return SDValue(NewST, 0);
   13419 
   13420     // Or is commutative, so try swapping X and Y.
   13421     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
   13422     if (MaskedLoad.first)
   13423       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
   13424                                                   Value.getOperand(0), ST,this))
   13425         return SDValue(NewST, 0);
   13426   }
   13427 
   13428   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
   13429       Value.getOperand(1).getOpcode() != ISD::Constant)
   13430     return SDValue();
   13431 
   13432   SDValue N0 = Value.getOperand(0);
   13433   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
   13434       Chain == SDValue(N0.getNode(), 1)) {
   13435     LoadSDNode *LD = cast<LoadSDNode>(N0);
   13436     if (LD->getBasePtr() != Ptr ||
   13437         LD->getPointerInfo().getAddrSpace() !=
   13438         ST->getPointerInfo().getAddrSpace())
   13439       return SDValue();
   13440 
   13441     // Find the type to narrow it the load / op / store to.
   13442     SDValue N1 = Value.getOperand(1);
   13443     unsigned BitWidth = N1.getValueSizeInBits();
   13444     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
   13445     if (Opc == ISD::AND)
   13446       Imm ^= APInt::getAllOnesValue(BitWidth);
   13447     if (Imm == 0 || Imm.isAllOnesValue())
   13448       return SDValue();
   13449     unsigned ShAmt = Imm.countTrailingZeros();
   13450     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
   13451     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
   13452     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
   13453     // The narrowing should be profitable, the load/store operation should be
   13454     // legal (or custom) and the store size should be equal to the NewVT width.
   13455     while (NewBW < BitWidth &&
   13456            (NewVT.getStoreSizeInBits() != NewBW ||
   13457             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
   13458             !TLI.isNarrowingProfitable(VT, NewVT))) {
   13459       NewBW = NextPowerOf2(NewBW);
   13460       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
   13461     }
   13462     if (NewBW >= BitWidth)
   13463       return SDValue();
   13464 
   13465     // If the lsb changed does not start at the type bitwidth boundary,
   13466     // start at the previous one.
   13467     if (ShAmt % NewBW)
   13468       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
   13469     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
   13470                                    std::min(BitWidth, ShAmt + NewBW));
   13471     if ((Imm & Mask) == Imm) {
   13472       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
   13473       if (Opc == ISD::AND)
   13474         NewImm ^= APInt::getAllOnesValue(NewBW);
   13475       uint64_t PtrOff = ShAmt / 8;
   13476       // For big endian targets, we need to adjust the offset to the pointer to
   13477       // load the correct bytes.
   13478       if (DAG.getDataLayout().isBigEndian())
   13479         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
   13480 
   13481       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
   13482       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
   13483       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
   13484         return SDValue();
   13485 
   13486       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
   13487                                    Ptr.getValueType(), Ptr,
   13488                                    DAG.getConstant(PtrOff, SDLoc(LD),
   13489                                                    Ptr.getValueType()));
   13490       SDValue NewLD =
   13491           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
   13492                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
   13493                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
   13494       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
   13495                                    DAG.getConstant(NewImm, SDLoc(Value),
   13496                                                    NewVT));
   13497       SDValue NewST =
   13498           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
   13499                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
   13500 
   13501       AddToWorklist(NewPtr.getNode());
   13502       AddToWorklist(NewLD.getNode());
   13503       AddToWorklist(NewVal.getNode());
   13504       WorklistRemover DeadNodes(*this);
   13505       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
   13506       ++OpsNarrowed;
   13507       return NewST;
   13508     }
   13509   }
   13510 
   13511   return SDValue();
   13512 }
   13513 
   13514 /// For a given floating point load / store pair, if the load value isn't used
   13515 /// by any other operations, then consider transforming the pair to integer
   13516 /// load / store operations if the target deems the transformation profitable.
   13517 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
   13518   StoreSDNode *ST  = cast<StoreSDNode>(N);
   13519   SDValue Chain = ST->getChain();
   13520   SDValue Value = ST->getValue();
   13521   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
   13522       Value.hasOneUse() &&
   13523       Chain == SDValue(Value.getNode(), 1)) {
   13524     LoadSDNode *LD = cast<LoadSDNode>(Value);
   13525     EVT VT = LD->getMemoryVT();
   13526     if (!VT.isFloatingPoint() ||
   13527         VT != ST->getMemoryVT() ||
   13528         LD->isNonTemporal() ||
   13529         ST->isNonTemporal() ||
   13530         LD->getPointerInfo().getAddrSpace() != 0 ||
   13531         ST->getPointerInfo().getAddrSpace() != 0)
   13532       return SDValue();
   13533 
   13534     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
   13535     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
   13536         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
   13537         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
   13538         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
   13539       return SDValue();
   13540 
   13541     unsigned LDAlign = LD->getAlignment();
   13542     unsigned STAlign = ST->getAlignment();
   13543     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
   13544     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
   13545     if (LDAlign < ABIAlign || STAlign < ABIAlign)
   13546       return SDValue();
   13547 
   13548     SDValue NewLD =
   13549         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
   13550                     LD->getPointerInfo(), LDAlign);
   13551 
   13552     SDValue NewST =
   13553         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
   13554                      ST->getPointerInfo(), STAlign);
   13555 
   13556     AddToWorklist(NewLD.getNode());
   13557     AddToWorklist(NewST.getNode());
   13558     WorklistRemover DeadNodes(*this);
   13559     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
   13560     ++LdStFP2Int;
   13561     return NewST;
   13562   }
   13563 
   13564   return SDValue();
   13565 }
   13566 
   13567 // This is a helper function for visitMUL to check the profitability
   13568 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
   13569 // MulNode is the original multiply, AddNode is (add x, c1),
   13570 // and ConstNode is c2.
   13571 //
   13572 // If the (add x, c1) has multiple uses, we could increase
   13573 // the number of adds if we make this transformation.
   13574 // It would only be worth doing this if we can remove a
   13575 // multiply in the process. Check for that here.
   13576 // To illustrate:
   13577 //     (A + c1) * c3
   13578 //     (A + c2) * c3
   13579 // We're checking for cases where we have common "c3 * A" expressions.
   13580 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
   13581                                               SDValue &AddNode,
   13582                                               SDValue &ConstNode) {
   13583   APInt Val;
   13584 
   13585   // If the add only has one use, this would be OK to do.
   13586   if (AddNode.getNode()->hasOneUse())
   13587     return true;
   13588 
   13589   // Walk all the users of the constant with which we're multiplying.
   13590   for (SDNode *Use : ConstNode->uses()) {
   13591     if (Use == MulNode) // This use is the one we're on right now. Skip it.
   13592       continue;
   13593 
   13594     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
   13595       SDNode *OtherOp;
   13596       SDNode *MulVar = AddNode.getOperand(0).getNode();
   13597 
   13598       // OtherOp is what we're multiplying against the constant.
   13599       if (Use->getOperand(0) == ConstNode)
   13600         OtherOp = Use->getOperand(1).getNode();
   13601       else
   13602         OtherOp = Use->getOperand(0).getNode();
   13603 
   13604       // Check to see if multiply is with the same operand of our "add".
   13605       //
   13606       //     ConstNode  = CONST
   13607       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
   13608       //     ...
   13609       //     AddNode  = (A + c1)  <-- MulVar is A.
   13610       //         = AddNode * ConstNode   <-- current visiting instruction.
   13611       //
   13612       // If we make this transformation, we will have a common
   13613       // multiply (ConstNode * A) that we can save.
   13614       if (OtherOp == MulVar)
   13615         return true;
   13616 
   13617       // Now check to see if a future expansion will give us a common
   13618       // multiply.
   13619       //
   13620       //     ConstNode  = CONST
   13621       //     AddNode    = (A + c1)
   13622       //     ...   = AddNode * ConstNode <-- current visiting instruction.
   13623       //     ...
   13624       //     OtherOp = (A + c2)
   13625       //     Use     = OtherOp * ConstNode <-- visiting Use.
   13626       //
   13627       // If we make this transformation, we will have a common
   13628       // multiply (CONST * A) after we also do the same transformation
   13629       // to the "t2" instruction.
   13630       if (OtherOp->getOpcode() == ISD::ADD &&
   13631           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
   13632           OtherOp->getOperand(0).getNode() == MulVar)
   13633         return true;
   13634     }
   13635   }
   13636 
   13637   // Didn't find a case where this would be profitable.
   13638   return false;
   13639 }
   13640 
   13641 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
   13642                                          unsigned NumStores) {
   13643   SmallVector<SDValue, 8> Chains;
   13644   SmallPtrSet<const SDNode *, 8> Visited;
   13645   SDLoc StoreDL(StoreNodes[0].MemNode);
   13646 
   13647   for (unsigned i = 0; i < NumStores; ++i) {
   13648     Visited.insert(StoreNodes[i].MemNode);
   13649   }
   13650 
   13651   // don't include nodes that are children
   13652   for (unsigned i = 0; i < NumStores; ++i) {
   13653     if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
   13654       Chains.push_back(StoreNodes[i].MemNode->getChain());
   13655   }
   13656 
   13657   assert(Chains.size() > 0 && "Chain should have generated a chain");
   13658   return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
   13659 }
   13660 
   13661 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
   13662     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
   13663     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
   13664   // Make sure we have something to merge.
   13665   if (NumStores < 2)
   13666     return false;
   13667 
   13668   // The latest Node in the DAG.
   13669   SDLoc DL(StoreNodes[0].MemNode);
   13670 
   13671   int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
   13672   unsigned SizeInBits = NumStores * ElementSizeBits;
   13673   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
   13674 
   13675   EVT StoreTy;
   13676   if (UseVector) {
   13677     unsigned Elts = NumStores * NumMemElts;
   13678     // Get the type for the merged vector store.
   13679     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
   13680   } else
   13681     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
   13682 
   13683   SDValue StoredVal;
   13684   if (UseVector) {
   13685     if (IsConstantSrc) {
   13686       SmallVector<SDValue, 8> BuildVector;
   13687       for (unsigned I = 0; I != NumStores; ++I) {
   13688         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
   13689         SDValue Val = St->getValue();
   13690         // If constant is of the wrong type, convert it now.
   13691         if (MemVT != Val.getValueType()) {
   13692           Val = peekThroughBitcast(Val);
   13693           // Deal with constants of wrong size.
   13694           if (ElementSizeBits != Val.getValueSizeInBits()) {
   13695             EVT IntMemVT =
   13696                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
   13697             if (isa<ConstantFPSDNode>(Val)) {
   13698               // Not clear how to truncate FP values.
   13699               return false;
   13700             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
   13701               Val = DAG.getConstant(C->getAPIntValue()
   13702                                         .zextOrTrunc(Val.getValueSizeInBits())
   13703                                         .zextOrTrunc(ElementSizeBits),
   13704                                     SDLoc(C), IntMemVT);
   13705           }
   13706           // Make sure correctly size type is the correct type.
   13707           Val = DAG.getBitcast(MemVT, Val);
   13708         }
   13709         BuildVector.push_back(Val);
   13710       }
   13711       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
   13712                                                : ISD::BUILD_VECTOR,
   13713                               DL, StoreTy, BuildVector);
   13714     } else {
   13715       SmallVector<SDValue, 8> Ops;
   13716       for (unsigned i = 0; i < NumStores; ++i) {
   13717         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
   13718         SDValue Val = peekThroughBitcast(St->getValue());
   13719         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
   13720         // type MemVT. If the underlying value is not the correct
   13721         // type, but it is an extraction of an appropriate vector we
   13722         // can recast Val to be of the correct type. This may require
   13723         // converting between EXTRACT_VECTOR_ELT and
   13724         // EXTRACT_SUBVECTOR.
   13725         if ((MemVT != Val.getValueType()) &&
   13726             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
   13727              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
   13728           SDValue Vec = Val.getOperand(0);
   13729           EVT MemVTScalarTy = MemVT.getScalarType();
   13730           // We may need to add a bitcast here to get types to line up.
   13731           if (MemVTScalarTy != Vec.getValueType()) {
   13732             unsigned Elts = Vec.getValueType().getSizeInBits() /
   13733                             MemVTScalarTy.getSizeInBits();
   13734             EVT NewVecTy =
   13735                 EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts);
   13736             Vec = DAG.getBitcast(NewVecTy, Vec);
   13737           }
   13738           auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR
   13739                                         : ISD::EXTRACT_VECTOR_ELT;
   13740           Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1));
   13741         }
   13742         Ops.push_back(Val);
   13743       }
   13744 
   13745       // Build the extracted vector elements back into a vector.
   13746       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
   13747                                                : ISD::BUILD_VECTOR,
   13748                               DL, StoreTy, Ops);
   13749     }
   13750   } else {
   13751     // We should always use a vector store when merging extracted vector
   13752     // elements, so this path implies a store of constants.
   13753     assert(IsConstantSrc && "Merged vector elements should use vector store");
   13754 
   13755     APInt StoreInt(SizeInBits, 0);
   13756 
   13757     // Construct a single integer constant which is made of the smaller
   13758     // constant inputs.
   13759     bool IsLE = DAG.getDataLayout().isLittleEndian();
   13760     for (unsigned i = 0; i < NumStores; ++i) {
   13761       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
   13762       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
   13763 
   13764       SDValue Val = St->getValue();
   13765       Val = peekThroughBitcast(Val);
   13766       StoreInt <<= ElementSizeBits;
   13767       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
   13768         StoreInt |= C->getAPIntValue()
   13769                         .zextOrTrunc(ElementSizeBits)
   13770                         .zextOrTrunc(SizeInBits);
   13771       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
   13772         StoreInt |= C->getValueAPF()
   13773                         .bitcastToAPInt()
   13774                         .zextOrTrunc(ElementSizeBits)
   13775                         .zextOrTrunc(SizeInBits);
   13776         // If fp truncation is necessary give up for now.
   13777         if (MemVT.getSizeInBits() != ElementSizeBits)
   13778           return false;
   13779       } else {
   13780         llvm_unreachable("Invalid constant element type");
   13781       }
   13782     }
   13783 
   13784     // Create the new Load and Store operations.
   13785     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
   13786   }
   13787 
   13788   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
   13789   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
   13790 
   13791   // make sure we use trunc store if it's necessary to be legal.
   13792   SDValue NewStore;
   13793   if (!UseTrunc) {
   13794     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
   13795                             FirstInChain->getPointerInfo(),
   13796                             FirstInChain->getAlignment());
   13797   } else { // Must be realized as a trunc store
   13798     EVT LegalizedStoredValTy =
   13799         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
   13800     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
   13801     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
   13802     SDValue ExtendedStoreVal =
   13803         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
   13804                         LegalizedStoredValTy);
   13805     NewStore = DAG.getTruncStore(
   13806         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
   13807         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
   13808         FirstInChain->getAlignment(),
   13809         FirstInChain->getMemOperand()->getFlags());
   13810   }
   13811 
   13812   // Replace all merged stores with the new store.
   13813   for (unsigned i = 0; i < NumStores; ++i)
   13814     CombineTo(StoreNodes[i].MemNode, NewStore);
   13815 
   13816   AddToWorklist(NewChain.getNode());
   13817   return true;
   13818 }
   13819 
   13820 void DAGCombiner::getStoreMergeCandidates(
   13821     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
   13822     SDNode *&RootNode) {
   13823   // This holds the base pointer, index, and the offset in bytes from the base
   13824   // pointer.
   13825   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
   13826   EVT MemVT = St->getMemoryVT();
   13827 
   13828   SDValue Val = peekThroughBitcast(St->getValue());
   13829   // We must have a base and an offset.
   13830   if (!BasePtr.getBase().getNode())
   13831     return;
   13832 
   13833   // Do not handle stores to undef base pointers.
   13834   if (BasePtr.getBase().isUndef())
   13835     return;
   13836 
   13837   bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
   13838   bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
   13839                           Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
   13840   bool IsLoadSrc = isa<LoadSDNode>(Val);
   13841   BaseIndexOffset LBasePtr;
   13842   // Match on loadbaseptr if relevant.
   13843   EVT LoadVT;
   13844   if (IsLoadSrc) {
   13845     auto *Ld = cast<LoadSDNode>(Val);
   13846     LBasePtr = BaseIndexOffset::match(Ld, DAG);
   13847     LoadVT = Ld->getMemoryVT();
   13848     // Load and store should be the same type.
   13849     if (MemVT != LoadVT)
   13850       return;
   13851     // Loads must only have one use.
   13852     if (!Ld->hasNUsesOfValue(1, 0))
   13853       return;
   13854     // The memory operands must not be volatile.
   13855     if (Ld->isVolatile() || Ld->isIndexed())
   13856       return;
   13857   }
   13858   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
   13859                             int64_t &Offset) -> bool {
   13860     if (Other->isVolatile() || Other->isIndexed())
   13861       return false;
   13862     SDValue Val = peekThroughBitcast(Other->getValue());
   13863     // Allow merging constants of different types as integers.
   13864     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
   13865                                            : Other->getMemoryVT() != MemVT;
   13866     if (IsLoadSrc) {
   13867       if (NoTypeMatch)
   13868         return false;
   13869       // The Load's Base Ptr must also match
   13870       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
   13871         auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
   13872         if (LoadVT != OtherLd->getMemoryVT())
   13873           return false;
   13874         // Loads must only have one use.
   13875         if (!OtherLd->hasNUsesOfValue(1, 0))
   13876           return false;
   13877         // The memory operands must not be volatile.
   13878         if (OtherLd->isVolatile() || OtherLd->isIndexed())
   13879           return false;
   13880         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
   13881           return false;
   13882       } else
   13883         return false;
   13884     }
   13885     if (IsConstantSrc) {
   13886       if (NoTypeMatch)
   13887         return false;
   13888       if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
   13889         return false;
   13890     }
   13891     if (IsExtractVecSrc) {
   13892       // Do not merge truncated stores here.
   13893       if (Other->isTruncatingStore())
   13894         return false;
   13895       if (!MemVT.bitsEq(Val.getValueType()))
   13896         return false;
   13897       if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
   13898           Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
   13899         return false;
   13900     }
   13901     Ptr = BaseIndexOffset::match(Other, DAG);
   13902     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
   13903   };
   13904 
   13905   // We looking for a root node which is an ancestor to all mergable
   13906   // stores. We search up through a load, to our root and then down
   13907   // through all children. For instance we will find Store{1,2,3} if
   13908   // St is Store1, Store2. or Store3 where the root is not a load
   13909   // which always true for nonvolatile ops. TODO: Expand
   13910   // the search to find all valid candidates through multiple layers of loads.
   13911   //
   13912   // Root
   13913   // |-------|-------|
   13914   // Load    Load    Store3
   13915   // |       |
   13916   // Store1   Store2
   13917   //
   13918   // FIXME: We should be able to climb and
   13919   // descend TokenFactors to find candidates as well.
   13920 
   13921   RootNode = St->getChain().getNode();
   13922 
   13923   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
   13924     RootNode = Ldn->getChain().getNode();
   13925     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
   13926       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
   13927         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
   13928           if (I2.getOperandNo() == 0)
   13929             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
   13930               BaseIndexOffset Ptr;
   13931               int64_t PtrDiff;
   13932               if (CandidateMatch(OtherST, Ptr, PtrDiff))
   13933                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
   13934             }
   13935   } else
   13936     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
   13937       if (I.getOperandNo() == 0)
   13938         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
   13939           BaseIndexOffset Ptr;
   13940           int64_t PtrDiff;
   13941           if (CandidateMatch(OtherST, Ptr, PtrDiff))
   13942             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
   13943         }
   13944 }
   13945 
   13946 // We need to check that merging these stores does not cause a loop in
   13947 // the DAG. Any store candidate may depend on another candidate
   13948 // indirectly through its operand (we already consider dependencies
   13949 // through the chain). Check in parallel by searching up from
   13950 // non-chain operands of candidates.
   13951 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
   13952     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
   13953     SDNode *RootNode) {
   13954   // FIXME: We should be able to truncate a full search of
   13955   // predecessors by doing a BFS and keeping tabs the originating
   13956   // stores from which worklist nodes come from in a similar way to
   13957   // TokenFactor simplfication.
   13958 
   13959   SmallPtrSet<const SDNode *, 32> Visited;
   13960   SmallVector<const SDNode *, 8> Worklist;
   13961 
   13962   // RootNode is a predecessor to all candidates so we need not search
   13963   // past it. Add RootNode (peeking through TokenFactors). Do not count
   13964   // these towards size check.
   13965 
   13966   Worklist.push_back(RootNode);
   13967   while (!Worklist.empty()) {
   13968     auto N = Worklist.pop_back_val();
   13969     if (N->getOpcode() == ISD::TokenFactor) {
   13970       for (SDValue Op : N->ops())
   13971         Worklist.push_back(Op.getNode());
   13972     }
   13973     Visited.insert(N);
   13974   }
   13975 
   13976   // Don't count pruning nodes towards max.
   13977   unsigned int Max = 1024 + Visited.size();
   13978   // Search Ops of store candidates.
   13979   for (unsigned i = 0; i < NumStores; ++i) {
   13980     SDNode *N = StoreNodes[i].MemNode;
   13981     // Of the 4 Store Operands:
   13982     //   * Chain (Op 0) -> We have already considered these
   13983     //                    in candidate selection and can be
   13984     //                    safely ignored
   13985     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
   13986     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant
   13987     //                      and so no cycles are possible.
   13988     //   * (Op 3) -> appears to always be undef. Cannot be source of cycle.
   13989     //
   13990     // Thus we need only check predecessors of the value operands.
   13991     auto *Op = N->getOperand(1).getNode();
   13992     if (Visited.insert(Op).second)
   13993       Worklist.push_back(Op);
   13994   }
   13995   // Search through DAG. We can stop early if we find a store node.
   13996   for (unsigned i = 0; i < NumStores; ++i)
   13997     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
   13998                                      Max))
   13999       return false;
   14000   return true;
   14001 }
   14002 
   14003 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
   14004   if (OptLevel == CodeGenOpt::None)
   14005     return false;
   14006 
   14007   EVT MemVT = St->getMemoryVT();
   14008   int64_t ElementSizeBytes = MemVT.getStoreSize();
   14009   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
   14010 
   14011   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
   14012     return false;
   14013 
   14014   bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
   14015       Attribute::NoImplicitFloat);
   14016 
   14017   // This function cannot currently deal with non-byte-sized memory sizes.
   14018   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
   14019     return false;
   14020 
   14021   if (!MemVT.isSimple())
   14022     return false;
   14023 
   14024   // Perform an early exit check. Do not bother looking at stored values that
   14025   // are not constants, loads, or extracted vector elements.
   14026   SDValue StoredVal = peekThroughBitcast(St->getValue());
   14027   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
   14028   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
   14029                        isa<ConstantFPSDNode>(StoredVal);
   14030   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
   14031                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
   14032 
   14033   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
   14034     return false;
   14035 
   14036   SmallVector<MemOpLink, 8> StoreNodes;
   14037   SDNode *RootNode;
   14038   // Find potential store merge candidates by searching through chain sub-DAG
   14039   getStoreMergeCandidates(St, StoreNodes, RootNode);
   14040 
   14041   // Check if there is anything to merge.
   14042   if (StoreNodes.size() < 2)
   14043     return false;
   14044 
   14045   // Sort the memory operands according to their distance from the
   14046   // base pointer.
   14047   llvm::sort(StoreNodes.begin(), StoreNodes.end(),
   14048              [](MemOpLink LHS, MemOpLink RHS) {
   14049                return LHS.OffsetFromBase < RHS.OffsetFromBase;
   14050              });
   14051 
   14052   // Store Merge attempts to merge the lowest stores. This generally
   14053   // works out as if successful, as the remaining stores are checked
   14054   // after the first collection of stores is merged. However, in the
   14055   // case that a non-mergeable store is found first, e.g., {p[-2],
   14056   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
   14057   // mergeable cases. To prevent this, we prune such stores from the
   14058   // front of StoreNodes here.
   14059 
   14060   bool RV = false;
   14061   while (StoreNodes.size() > 1) {
   14062     unsigned StartIdx = 0;
   14063     while ((StartIdx + 1 < StoreNodes.size()) &&
   14064            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
   14065                StoreNodes[StartIdx + 1].OffsetFromBase)
   14066       ++StartIdx;
   14067 
   14068     // Bail if we don't have enough candidates to merge.
   14069     if (StartIdx + 1 >= StoreNodes.size())
   14070       return RV;
   14071 
   14072     if (StartIdx)
   14073       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
   14074 
   14075     // Scan the memory operations on the chain and find the first
   14076     // non-consecutive store memory address.
   14077     unsigned NumConsecutiveStores = 1;
   14078     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
   14079     // Check that the addresses are consecutive starting from the second
   14080     // element in the list of stores.
   14081     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
   14082       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
   14083       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
   14084         break;
   14085       NumConsecutiveStores = i + 1;
   14086     }
   14087 
   14088     if (NumConsecutiveStores < 2) {
   14089       StoreNodes.erase(StoreNodes.begin(),
   14090                        StoreNodes.begin() + NumConsecutiveStores);
   14091       continue;
   14092     }
   14093 
   14094     // The node with the lowest store address.
   14095     LLVMContext &Context = *DAG.getContext();
   14096     const DataLayout &DL = DAG.getDataLayout();
   14097 
   14098     // Store the constants into memory as one consecutive store.
   14099     if (IsConstantSrc) {
   14100       while (NumConsecutiveStores >= 2) {
   14101         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
   14102         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
   14103         unsigned FirstStoreAlign = FirstInChain->getAlignment();
   14104         unsigned LastLegalType = 1;
   14105         unsigned LastLegalVectorType = 1;
   14106         bool LastIntegerTrunc = false;
   14107         bool NonZero = false;
   14108         unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
   14109         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
   14110           StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
   14111           SDValue StoredVal = ST->getValue();
   14112           bool IsElementZero = false;
   14113           if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
   14114             IsElementZero = C->isNullValue();
   14115           else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
   14116             IsElementZero = C->getConstantFPValue()->isNullValue();
   14117           if (IsElementZero) {
   14118             if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
   14119               FirstZeroAfterNonZero = i;
   14120           }
   14121           NonZero |= !IsElementZero;
   14122 
   14123           // Find a legal type for the constant store.
   14124           unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
   14125           EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
   14126           bool IsFast = false;
   14127 
   14128           // Break early when size is too large to be legal.
   14129           if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
   14130             break;
   14131 
   14132           if (TLI.isTypeLegal(StoreTy) &&
   14133               TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
   14134               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
   14135                                      FirstStoreAlign, &IsFast) &&
   14136               IsFast) {
   14137             LastIntegerTrunc = false;
   14138             LastLegalType = i + 1;
   14139             // Or check whether a truncstore is legal.
   14140           } else if (TLI.getTypeAction(Context, StoreTy) ==
   14141                      TargetLowering::TypePromoteInteger) {
   14142             EVT LegalizedStoredValTy =
   14143                 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
   14144             if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
   14145                 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
   14146                 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
   14147                                        FirstStoreAlign, &IsFast) &&
   14148                 IsFast) {
   14149               LastIntegerTrunc = true;
   14150               LastLegalType = i + 1;
   14151             }
   14152           }
   14153 
   14154           // We only use vectors if the constant is known to be zero or the
   14155           // target allows it and the function is not marked with the
   14156           // noimplicitfloat attribute.
   14157           if ((!NonZero ||
   14158                TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
   14159               !NoVectors) {
   14160             // Find a legal type for the vector store.
   14161             unsigned Elts = (i + 1) * NumMemElts;
   14162             EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
   14163             if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
   14164                 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
   14165                 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
   14166                                        FirstStoreAlign, &IsFast) &&
   14167                 IsFast)
   14168               LastLegalVectorType = i + 1;
   14169           }
   14170         }
   14171 
   14172         bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
   14173         unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
   14174 
   14175         // Check if we found a legal integer type that creates a meaningful
   14176         // merge.
   14177         if (NumElem < 2) {
   14178           // We know that candidate stores are in order and of correct
   14179           // shape. While there is no mergeable sequence from the
   14180           // beginning one may start later in the sequence. The only
   14181           // reason a merge of size N could have failed where another of
   14182           // the same size would not have, is if the alignment has
   14183           // improved or we've dropped a non-zero value. Drop as many
   14184           // candidates as we can here.
   14185           unsigned NumSkip = 1;
   14186           while (
   14187               (NumSkip < NumConsecutiveStores) &&
   14188               (NumSkip < FirstZeroAfterNonZero) &&
   14189               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
   14190             NumSkip++;
   14191 
   14192           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
   14193           NumConsecutiveStores -= NumSkip;
   14194           continue;
   14195         }
   14196 
   14197         // Check that we can merge these candidates without causing a cycle.
   14198         if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
   14199                                                       RootNode)) {
   14200           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
   14201           NumConsecutiveStores -= NumElem;
   14202           continue;
   14203         }
   14204 
   14205         RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
   14206                                               UseVector, LastIntegerTrunc);
   14207 
   14208         // Remove merged stores for next iteration.
   14209         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
   14210         NumConsecutiveStores -= NumElem;
   14211       }
   14212       continue;
   14213     }
   14214 
   14215     // When extracting multiple vector elements, try to store them
   14216     // in one vector store rather than a sequence of scalar stores.
   14217     if (IsExtractVecSrc) {
   14218       // Loop on Consecutive Stores on success.
   14219       while (NumConsecutiveStores >= 2) {
   14220         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
   14221         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
   14222         unsigned FirstStoreAlign = FirstInChain->getAlignment();
   14223         unsigned NumStoresToMerge = 1;
   14224         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
   14225           // Find a legal type for the vector store.
   14226           unsigned Elts = (i + 1) * NumMemElts;
   14227           EVT Ty =
   14228               EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
   14229           bool IsFast;
   14230 
   14231           // Break early when size is too large to be legal.
   14232           if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
   14233             break;
   14234 
   14235           if (TLI.isTypeLegal(Ty) &&
   14236               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
   14237               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
   14238                                      FirstStoreAlign, &IsFast) &&
   14239               IsFast)
   14240             NumStoresToMerge = i + 1;
   14241         }
   14242 
   14243         // Check if we found a legal integer type creating a meaningful
   14244         // merge.
   14245         if (NumStoresToMerge < 2) {
   14246           // We know that candidate stores are in order and of correct
   14247           // shape. While there is no mergeable sequence from the
   14248           // beginning one may start later in the sequence. The only
   14249           // reason a merge of size N could have failed where another of
   14250           // the same size would not have, is if the alignment has
   14251           // improved. Drop as many candidates as we can here.
   14252           unsigned NumSkip = 1;
   14253           while (
   14254               (NumSkip < NumConsecutiveStores) &&
   14255               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
   14256             NumSkip++;
   14257 
   14258           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
   14259           NumConsecutiveStores -= NumSkip;
   14260           continue;
   14261         }
   14262 
   14263         // Check that we can merge these candidates without causing a cycle.
   14264         if (!checkMergeStoreCandidatesForDependencies(
   14265                 StoreNodes, NumStoresToMerge, RootNode)) {
   14266           StoreNodes.erase(StoreNodes.begin(),
   14267                            StoreNodes.begin() + NumStoresToMerge);
   14268           NumConsecutiveStores -= NumStoresToMerge;
   14269           continue;
   14270         }
   14271 
   14272         RV |= MergeStoresOfConstantsOrVecElts(
   14273             StoreNodes, MemVT, NumStoresToMerge, false, true, false);
   14274 
   14275         StoreNodes.erase(StoreNodes.begin(),
   14276                          StoreNodes.begin() + NumStoresToMerge);
   14277         NumConsecutiveStores -= NumStoresToMerge;
   14278       }
   14279       continue;
   14280     }
   14281 
   14282     // Below we handle the case of multiple consecutive stores that
   14283     // come from multiple consecutive loads. We merge them into a single
   14284     // wide load and a single wide store.
   14285 
   14286     // Look for load nodes which are used by the stored values.
   14287     SmallVector<MemOpLink, 8> LoadNodes;
   14288 
   14289     // Find acceptable loads. Loads need to have the same chain (token factor),
   14290     // must not be zext, volatile, indexed, and they must be consecutive.
   14291     BaseIndexOffset LdBasePtr;
   14292 
   14293     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
   14294       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
   14295       SDValue Val = peekThroughBitcast(St->getValue());
   14296       LoadSDNode *Ld = cast<LoadSDNode>(Val);
   14297 
   14298       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
   14299       // If this is not the first ptr that we check.
   14300       int64_t LdOffset = 0;
   14301       if (LdBasePtr.getBase().getNode()) {
   14302         // The base ptr must be the same.
   14303         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
   14304           break;
   14305       } else {
   14306         // Check that all other base pointers are the same as this one.
   14307         LdBasePtr = LdPtr;
   14308       }
   14309 
   14310       // We found a potential memory operand to merge.
   14311       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
   14312     }
   14313 
   14314     while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
   14315       // If we have load/store pair instructions and we only have two values,
   14316       // don't bother merging.
   14317       unsigned RequiredAlignment;
   14318       if (LoadNodes.size() == 2 &&
   14319           TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
   14320           StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
   14321         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
   14322         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
   14323         break;
   14324       }
   14325       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
   14326       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
   14327       unsigned FirstStoreAlign = FirstInChain->getAlignment();
   14328       LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
   14329       unsigned FirstLoadAS = FirstLoad->getAddressSpace();
   14330       unsigned FirstLoadAlign = FirstLoad->getAlignment();
   14331 
   14332       // Scan the memory operations on the chain and find the first
   14333       // non-consecutive load memory address. These variables hold the index in
   14334       // the store node array.
   14335 
   14336       unsigned LastConsecutiveLoad = 1;
   14337 
   14338       // This variable refers to the size and not index in the array.
   14339       unsigned LastLegalVectorType = 1;
   14340       unsigned LastLegalIntegerType = 1;
   14341       bool isDereferenceable = true;
   14342       bool DoIntegerTruncate = false;
   14343       StartAddress = LoadNodes[0].OffsetFromBase;
   14344       SDValue FirstChain = FirstLoad->getChain();
   14345       for (unsigned i = 1; i < LoadNodes.size(); ++i) {
   14346         // All loads must share the same chain.
   14347         if (LoadNodes[i].MemNode->getChain() != FirstChain)
   14348           break;
   14349 
   14350         int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
   14351         if (CurrAddress - StartAddress != (ElementSizeBytes * i))
   14352           break;
   14353         LastConsecutiveLoad = i;
   14354 
   14355         if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
   14356           isDereferenceable = false;
   14357 
   14358         // Find a legal type for the vector store.
   14359         unsigned Elts = (i + 1) * NumMemElts;
   14360         EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
   14361 
   14362         // Break early when size is too large to be legal.
   14363         if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
   14364           break;
   14365 
   14366         bool IsFastSt, IsFastLd;
   14367         if (TLI.isTypeLegal(StoreTy) &&
   14368             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
   14369             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
   14370                                    FirstStoreAlign, &IsFastSt) &&
   14371             IsFastSt &&
   14372             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
   14373                                    FirstLoadAlign, &IsFastLd) &&
   14374             IsFastLd) {
   14375           LastLegalVectorType = i + 1;
   14376         }
   14377 
   14378         // Find a legal type for the integer store.
   14379         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
   14380         StoreTy = EVT::getIntegerVT(Context, SizeInBits);
   14381         if (TLI.isTypeLegal(StoreTy) &&
   14382             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
   14383             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
   14384                                    FirstStoreAlign, &IsFastSt) &&
   14385             IsFastSt &&
   14386             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
   14387                                    FirstLoadAlign, &IsFastLd) &&
   14388             IsFastLd) {
   14389           LastLegalIntegerType = i + 1;
   14390           DoIntegerTruncate = false;
   14391           // Or check whether a truncstore and extload is legal.
   14392         } else if (TLI.getTypeAction(Context, StoreTy) ==
   14393                    TargetLowering::TypePromoteInteger) {
   14394           EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
   14395           if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
   14396               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
   14397               TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
   14398                                  StoreTy) &&
   14399               TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
   14400                                  StoreTy) &&
   14401               TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
   14402               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
   14403                                      FirstStoreAlign, &IsFastSt) &&
   14404               IsFastSt &&
   14405               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
   14406                                      FirstLoadAlign, &IsFastLd) &&
   14407               IsFastLd) {
   14408             LastLegalIntegerType = i + 1;
   14409             DoIntegerTruncate = true;
   14410           }
   14411         }
   14412       }
   14413 
   14414       // Only use vector types if the vector type is larger than the integer
   14415       // type. If they are the same, use integers.
   14416       bool UseVectorTy =
   14417           LastLegalVectorType > LastLegalIntegerType && !NoVectors;
   14418       unsigned LastLegalType =
   14419           std::max(LastLegalVectorType, LastLegalIntegerType);
   14420 
   14421       // We add +1 here because the LastXXX variables refer to location while
   14422       // the NumElem refers to array/index size.
   14423       unsigned NumElem =
   14424           std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
   14425       NumElem = std::min(LastLegalType, NumElem);
   14426 
   14427       if (NumElem < 2) {
   14428         // We know that candidate stores are in order and of correct
   14429         // shape. While there is no mergeable sequence from the
   14430         // beginning one may start later in the sequence. The only
   14431         // reason a merge of size N could have failed where another of
   14432         // the same size would not have is if the alignment or either
   14433         // the load or store has improved. Drop as many candidates as we
   14434         // can here.
   14435         unsigned NumSkip = 1;
   14436         while ((NumSkip < LoadNodes.size()) &&
   14437                (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
   14438                (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
   14439           NumSkip++;
   14440         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
   14441         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
   14442         NumConsecutiveStores -= NumSkip;
   14443         continue;
   14444       }
   14445 
   14446       // Check that we can merge these candidates without causing a cycle.
   14447       if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
   14448                                                     RootNode)) {
   14449         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
   14450         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
   14451         NumConsecutiveStores -= NumElem;
   14452         continue;
   14453       }
   14454 
   14455       // Find if it is better to use vectors or integers to load and store
   14456       // to memory.
   14457       EVT JointMemOpVT;
   14458       if (UseVectorTy) {
   14459         // Find a legal type for the vector store.
   14460         unsigned Elts = NumElem * NumMemElts;
   14461         JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
   14462       } else {
   14463         unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
   14464         JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
   14465       }
   14466 
   14467       SDLoc LoadDL(LoadNodes[0].MemNode);
   14468       SDLoc StoreDL(StoreNodes[0].MemNode);
   14469 
   14470       // The merged loads are required to have the same incoming chain, so
   14471       // using the first's chain is acceptable.
   14472 
   14473       SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
   14474       AddToWorklist(NewStoreChain.getNode());
   14475 
   14476       MachineMemOperand::Flags MMOFlags =
   14477           isDereferenceable ? MachineMemOperand::MODereferenceable
   14478                             : MachineMemOperand::MONone;
   14479 
   14480       SDValue NewLoad, NewStore;
   14481       if (UseVectorTy || !DoIntegerTruncate) {
   14482         NewLoad =
   14483             DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
   14484                         FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
   14485                         FirstLoadAlign, MMOFlags);
   14486         NewStore = DAG.getStore(
   14487             NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
   14488             FirstInChain->getPointerInfo(), FirstStoreAlign);
   14489       } else { // This must be the truncstore/extload case
   14490         EVT ExtendedTy =
   14491             TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
   14492         NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
   14493                                  FirstLoad->getChain(), FirstLoad->getBasePtr(),
   14494                                  FirstLoad->getPointerInfo(), JointMemOpVT,
   14495                                  FirstLoadAlign, MMOFlags);
   14496         NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
   14497                                      FirstInChain->getBasePtr(),
   14498                                      FirstInChain->getPointerInfo(),
   14499                                      JointMemOpVT, FirstInChain->getAlignment(),
   14500                                      FirstInChain->getMemOperand()->getFlags());
   14501       }
   14502 
   14503       // Transfer chain users from old loads to the new load.
   14504       for (unsigned i = 0; i < NumElem; ++i) {
   14505         LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
   14506         DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
   14507                                       SDValue(NewLoad.getNode(), 1));
   14508       }
   14509 
   14510       // Replace the all stores with the new store. Recursively remove
   14511       // corresponding value if its no longer used.
   14512       for (unsigned i = 0; i < NumElem; ++i) {
   14513         SDValue Val = StoreNodes[i].MemNode->getOperand(1);
   14514         CombineTo(StoreNodes[i].MemNode, NewStore);
   14515         if (Val.getNode()->use_empty())
   14516           recursivelyDeleteUnusedNodes(Val.getNode());
   14517       }
   14518 
   14519       RV = true;
   14520       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
   14521       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
   14522       NumConsecutiveStores -= NumElem;
   14523     }
   14524   }
   14525   return RV;
   14526 }
   14527 
   14528 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
   14529   SDLoc SL(ST);
   14530   SDValue ReplStore;
   14531 
   14532   // Replace the chain to avoid dependency.
   14533   if (ST->isTruncatingStore()) {
   14534     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
   14535                                   ST->getBasePtr(), ST->getMemoryVT(),
   14536                                   ST->getMemOperand());
   14537   } else {
   14538     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
   14539                              ST->getMemOperand());
   14540   }
   14541 
   14542   // Create token to keep both nodes around.
   14543   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
   14544                               MVT::Other, ST->getChain(), ReplStore);
   14545 
   14546   // Make sure the new and old chains are cleaned up.
   14547   AddToWorklist(Token.getNode());
   14548 
   14549   // Don't add users to work list.
   14550   return CombineTo(ST, Token, false);
   14551 }
   14552 
   14553 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
   14554   SDValue Value = ST->getValue();
   14555   if (Value.getOpcode() == ISD::TargetConstantFP)
   14556     return SDValue();
   14557 
   14558   SDLoc DL(ST);
   14559 
   14560   SDValue Chain = ST->getChain();
   14561   SDValue Ptr = ST->getBasePtr();
   14562 
   14563   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
   14564 
   14565   // NOTE: If the original store is volatile, this transform must not increase
   14566   // the number of stores.  For example, on x86-32 an f64 can be stored in one
   14567   // processor operation but an i64 (which is not legal) requires two.  So the
   14568   // transform should not be done in this case.
   14569 
   14570   SDValue Tmp;
   14571   switch (CFP->getSimpleValueType(0).SimpleTy) {
   14572   default:
   14573     llvm_unreachable("Unknown FP type");
   14574   case MVT::f16:    // We don't do this for these yet.
   14575   case MVT::f80:
   14576   case MVT::f128:
   14577   case MVT::ppcf128:
   14578     return SDValue();
   14579   case MVT::f32:
   14580     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
   14581         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
   14582       ;
   14583       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
   14584                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
   14585                             MVT::i32);
   14586       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
   14587     }
   14588 
   14589     return SDValue();
   14590   case MVT::f64:
   14591     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
   14592          !ST->isVolatile()) ||
   14593         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
   14594       ;
   14595       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
   14596                             getZExtValue(), SDLoc(CFP), MVT::i64);
   14597       return DAG.getStore(Chain, DL, Tmp,
   14598                           Ptr, ST->getMemOperand());
   14599     }
   14600 
   14601     if (!ST->isVolatile() &&
   14602         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
   14603       // Many FP stores are not made apparent until after legalize, e.g. for
   14604       // argument passing.  Since this is so common, custom legalize the
   14605       // 64-bit integer store into two 32-bit stores.
   14606       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
   14607       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
   14608       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
   14609       if (DAG.getDataLayout().isBigEndian())
   14610         std::swap(Lo, Hi);
   14611 
   14612       unsigned Alignment = ST->getAlignment();
   14613       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
   14614       AAMDNodes AAInfo = ST->getAAInfo();
   14615 
   14616       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
   14617                                  ST->getAlignment(), MMOFlags, AAInfo);
   14618       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
   14619                         DAG.getConstant(4, DL, Ptr.getValueType()));
   14620       Alignment = MinAlign(Alignment, 4U);
   14621       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
   14622                                  ST->getPointerInfo().getWithOffset(4),
   14623                                  Alignment, MMOFlags, AAInfo);
   14624       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
   14625                          St0, St1);
   14626     }
   14627 
   14628     return SDValue();
   14629   }
   14630 }
   14631 
   14632 SDValue DAGCombiner::visitSTORE(SDNode *N) {
   14633   StoreSDNode *ST  = cast<StoreSDNode>(N);
   14634   SDValue Chain = ST->getChain();
   14635   SDValue Value = ST->getValue();
   14636   SDValue Ptr   = ST->getBasePtr();
   14637 
   14638   // If this is a store of a bit convert, store the input value if the
   14639   // resultant store does not need a higher alignment than the original.
   14640   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
   14641       ST->isUnindexed()) {
   14642     EVT SVT = Value.getOperand(0).getValueType();
   14643     if (((!LegalOperations && !ST->isVolatile()) ||
   14644          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
   14645         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
   14646       unsigned OrigAlign = ST->getAlignment();
   14647       bool Fast = false;
   14648       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
   14649                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
   14650           Fast) {
   14651         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
   14652                             ST->getPointerInfo(), OrigAlign,
   14653                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
   14654       }
   14655     }
   14656   }
   14657 
   14658   // Turn 'store undef, Ptr' -> nothing.
   14659   if (Value.isUndef() && ST->isUnindexed())
   14660     return Chain;
   14661 
   14662   // Try to infer better alignment information than the store already has.
   14663   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
   14664     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
   14665       if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
   14666         SDValue NewStore =
   14667             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
   14668                               ST->getMemoryVT(), Align,
   14669                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
   14670         // NewStore will always be N as we are only refining the alignment
   14671         assert(NewStore.getNode() == N);
   14672         (void)NewStore;
   14673       }
   14674     }
   14675   }
   14676 
   14677   // Try transforming a pair floating point load / store ops to integer
   14678   // load / store ops.
   14679   if (SDValue NewST = TransformFPLoadStorePair(N))
   14680     return NewST;
   14681 
   14682   if (ST->isUnindexed()) {
   14683     // Walk up chain skipping non-aliasing memory nodes, on this store and any
   14684     // adjacent stores.
   14685     if (findBetterNeighborChains(ST)) {
   14686       // replaceStoreChain uses CombineTo, which handled all of the worklist
   14687       // manipulation. Return the original node to not do anything else.
   14688       return SDValue(ST, 0);
   14689     }
   14690     Chain = ST->getChain();
   14691   }
   14692 
   14693   // FIXME: is there such a thing as a truncating indexed store?
   14694   if (ST->isTruncatingStore() && ST->isUnindexed() &&
   14695       Value.getValueType().isInteger()) {
   14696     // See if we can simplify the input to this truncstore with knowledge that
   14697     // only the low bits are being used.  For example:
   14698     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
   14699     SDValue Shorter = DAG.GetDemandedBits(
   14700         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
   14701                                     ST->getMemoryVT().getScalarSizeInBits()));
   14702     AddToWorklist(Value.getNode());
   14703     if (Shorter.getNode())
   14704       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
   14705                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
   14706 
   14707     // Otherwise, see if we can simplify the operation with
   14708     // SimplifyDemandedBits, which only works if the value has a single use.
   14709     if (SimplifyDemandedBits(
   14710             Value,
   14711             APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
   14712                                  ST->getMemoryVT().getScalarSizeInBits()))) {
   14713       // Re-visit the store if anything changed and the store hasn't been merged
   14714       // with another node (N is deleted) SimplifyDemandedBits will add Value's
   14715       // node back to the worklist if necessary, but we also need to re-visit
   14716       // the Store node itself.
   14717       if (N->getOpcode() != ISD::DELETED_NODE)
   14718         AddToWorklist(N);
   14719       return SDValue(N, 0);
   14720     }
   14721   }
   14722 
   14723   // If this is a load followed by a store to the same location, then the store
   14724   // is dead/noop.
   14725   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
   14726     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
   14727         ST->isUnindexed() && !ST->isVolatile() &&
   14728         // There can't be any side effects between the load and store, such as
   14729         // a call or store.
   14730         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
   14731       // The store is dead, remove it.
   14732       return Chain;
   14733     }
   14734   }
   14735 
   14736   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
   14737     if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
   14738         !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
   14739         ST->getMemoryVT() == ST1->getMemoryVT()) {
   14740       // If this is a store followed by a store with the same value to the same
   14741       // location, then the store is dead/noop.
   14742       if (ST1->getValue() == Value) {
   14743         // The store is dead, remove it.
   14744         return Chain;
   14745       }
   14746 
   14747       // If this is a store who's preceeding store to the same location
   14748       // and no one other node is chained to that store we can effectively
   14749       // drop the store. Do not remove stores to undef as they may be used as
   14750       // data sinks.
   14751       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
   14752           !ST1->getBasePtr().isUndef()) {
   14753         // ST1 is fully overwritten and can be elided. Combine with it's chain
   14754         // value.
   14755         CombineTo(ST1, ST1->getChain());
   14756         return SDValue();
   14757       }
   14758     }
   14759   }
   14760 
   14761   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
   14762   // truncating store.  We can do this even if this is already a truncstore.
   14763   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
   14764       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
   14765       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
   14766                             ST->getMemoryVT())) {
   14767     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
   14768                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
   14769   }
   14770 
   14771   // Always perform this optimization before types are legal. If the target
   14772   // prefers, also try this after legalization to catch stores that were created
   14773   // by intrinsics or other nodes.
   14774   if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
   14775     while (true) {
   14776       // There can be multiple store sequences on the same chain.
   14777       // Keep trying to merge store sequences until we are unable to do so
   14778       // or until we merge the last store on the chain.
   14779       bool Changed = MergeConsecutiveStores(ST);
   14780       if (!Changed) break;
   14781       // Return N as merge only uses CombineTo and no worklist clean
   14782       // up is necessary.
   14783       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
   14784         return SDValue(N, 0);
   14785     }
   14786   }
   14787 
   14788   // Try transforming N to an indexed store.
   14789   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
   14790     return SDValue(N, 0);
   14791 
   14792   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
   14793   //
   14794   // Make sure to do this only after attempting to merge stores in order to
   14795   //  avoid changing the types of some subset of stores due to visit order,
   14796   //  preventing their merging.
   14797   if (isa<ConstantFPSDNode>(ST->getValue())) {
   14798     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
   14799       return NewSt;
   14800   }
   14801 
   14802   if (SDValue NewSt = splitMergedValStore(ST))
   14803     return NewSt;
   14804 
   14805   return ReduceLoadOpStoreWidth(N);
   14806 }
   14807 
   14808 /// For the instruction sequence of store below, F and I values
   14809 /// are bundled together as an i64 value before being stored into memory.
   14810 /// Sometimes it is more efficent to generate separate stores for F and I,
   14811 /// which can remove the bitwise instructions or sink them to colder places.
   14812 ///
   14813 ///   (store (or (zext (bitcast F to i32) to i64),
   14814 ///              (shl (zext I to i64), 32)), addr)  -->
   14815 ///   (store F, addr) and (store I, addr+4)
   14816 ///
   14817 /// Similarly, splitting for other merged store can also be beneficial, like:
   14818 /// For pair of {i32, i32}, i64 store --> two i32 stores.
   14819 /// For pair of {i32, i16}, i64 store --> two i32 stores.
   14820 /// For pair of {i16, i16}, i32 store --> two i16 stores.
   14821 /// For pair of {i16, i8},  i32 store --> two i16 stores.
   14822 /// For pair of {i8, i8},   i16 store --> two i8 stores.
   14823 ///
   14824 /// We allow each target to determine specifically which kind of splitting is
   14825 /// supported.
   14826 ///
   14827 /// The store patterns are commonly seen from the simple code snippet below
   14828 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
   14829 ///   void goo(const std::pair<int, float> &);
   14830 ///   hoo() {
   14831 ///     ...
   14832 ///     goo(std::make_pair(tmp, ftmp));
   14833 ///     ...
   14834 ///   }
   14835 ///
   14836 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
   14837   if (OptLevel == CodeGenOpt::None)
   14838     return SDValue();
   14839 
   14840   SDValue Val = ST->getValue();
   14841   SDLoc DL(ST);
   14842 
   14843   // Match OR operand.
   14844   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
   14845     return SDValue();
   14846 
   14847   // Match SHL operand and get Lower and Higher parts of Val.
   14848   SDValue Op1 = Val.getOperand(0);
   14849   SDValue Op2 = Val.getOperand(1);
   14850   SDValue Lo, Hi;
   14851   if (Op1.getOpcode() != ISD::SHL) {
   14852     std::swap(Op1, Op2);
   14853     if (Op1.getOpcode() != ISD::SHL)
   14854       return SDValue();
   14855   }
   14856   Lo = Op2;
   14857   Hi = Op1.getOperand(0);
   14858   if (!Op1.hasOneUse())
   14859     return SDValue();
   14860 
   14861   // Match shift amount to HalfValBitSize.
   14862   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
   14863   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
   14864   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
   14865     return SDValue();
   14866 
   14867   // Lo and Hi are zero-extended from int with size less equal than 32
   14868   // to i64.
   14869   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
   14870       !Lo.getOperand(0).getValueType().isScalarInteger() ||
   14871       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
   14872       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
   14873       !Hi.getOperand(0).getValueType().isScalarInteger() ||
   14874       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
   14875     return SDValue();
   14876 
   14877   // Use the EVT of low and high parts before bitcast as the input
   14878   // of target query.
   14879   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
   14880                   ? Lo.getOperand(0).getValueType()
   14881                   : Lo.getValueType();
   14882   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
   14883                    ? Hi.getOperand(0).getValueType()
   14884                    : Hi.getValueType();
   14885   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
   14886     return SDValue();
   14887 
   14888   // Start to split store.
   14889   unsigned Alignment = ST->getAlignment();
   14890   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
   14891   AAMDNodes AAInfo = ST->getAAInfo();
   14892 
   14893   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
   14894   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
   14895   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
   14896   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
   14897 
   14898   SDValue Chain = ST->getChain();
   14899   SDValue Ptr = ST->getBasePtr();
   14900   // Lower value store.
   14901   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
   14902                              ST->getAlignment(), MMOFlags, AAInfo);
   14903   Ptr =
   14904       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
   14905                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
   14906   // Higher value store.
   14907   SDValue St1 =
   14908       DAG.getStore(St0, DL, Hi, Ptr,
   14909                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
   14910                    Alignment / 2, MMOFlags, AAInfo);
   14911   return St1;
   14912 }
   14913 
   14914 /// Convert a disguised subvector insertion into a shuffle:
   14915 /// insert_vector_elt V, (bitcast X from vector type), IdxC -->
   14916 /// bitcast(shuffle (bitcast V), (extended X), Mask)
   14917 /// Note: We do not use an insert_subvector node because that requires a legal
   14918 /// subvector type.
   14919 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
   14920   SDValue InsertVal = N->getOperand(1);
   14921   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
   14922       !InsertVal.getOperand(0).getValueType().isVector())
   14923     return SDValue();
   14924 
   14925   SDValue SubVec = InsertVal.getOperand(0);
   14926   SDValue DestVec = N->getOperand(0);
   14927   EVT SubVecVT = SubVec.getValueType();
   14928   EVT VT = DestVec.getValueType();
   14929   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
   14930   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
   14931   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
   14932 
   14933   // Step 1: Create a shuffle mask that implements this insert operation. The
   14934   // vector that we are inserting into will be operand 0 of the shuffle, so
   14935   // those elements are just 'i'. The inserted subvector is in the first
   14936   // positions of operand 1 of the shuffle. Example:
   14937   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
   14938   SmallVector<int, 16> Mask(NumMaskVals);
   14939   for (unsigned i = 0; i != NumMaskVals; ++i) {
   14940     if (i / NumSrcElts == InsIndex)
   14941       Mask[i] = (i % NumSrcElts) + NumMaskVals;
   14942     else
   14943       Mask[i] = i;
   14944   }
   14945 
   14946   // Bail out if the target can not handle the shuffle we want to create.
   14947   EVT SubVecEltVT = SubVecVT.getVectorElementType();
   14948   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
   14949   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
   14950     return SDValue();
   14951 
   14952   // Step 2: Create a wide vector from the inserted source vector by appending
   14953   // undefined elements. This is the same size as our destination vector.
   14954   SDLoc DL(N);
   14955   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
   14956   ConcatOps[0] = SubVec;
   14957   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
   14958 
   14959   // Step 3: Shuffle in the padded subvector.
   14960   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
   14961   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
   14962   AddToWorklist(PaddedSubV.getNode());
   14963   AddToWorklist(DestVecBC.getNode());
   14964   AddToWorklist(Shuf.getNode());
   14965   return DAG.getBitcast(VT, Shuf);
   14966 }
   14967 
   14968 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
   14969   SDValue InVec = N->getOperand(0);
   14970   SDValue InVal = N->getOperand(1);
   14971   SDValue EltNo = N->getOperand(2);
   14972   SDLoc DL(N);
   14973 
   14974   // If the inserted element is an UNDEF, just use the input vector.
   14975   if (InVal.isUndef())
   14976     return InVec;
   14977 
   14978   EVT VT = InVec.getValueType();
   14979 
   14980   // Remove redundant insertions:
   14981   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
   14982   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
   14983       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
   14984     return InVec;
   14985 
   14986   // We must know which element is being inserted for folds below here.
   14987   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
   14988   if (!IndexC)
   14989     return SDValue();
   14990   unsigned Elt = IndexC->getZExtValue();
   14991 
   14992   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
   14993     return Shuf;
   14994 
   14995   // Canonicalize insert_vector_elt dag nodes.
   14996   // Example:
   14997   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
   14998   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
   14999   //
   15000   // Do this only if the child insert_vector node has one use; also
   15001   // do this only if indices are both constants and Idx1 < Idx0.
   15002   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
   15003       && isa<ConstantSDNode>(InVec.getOperand(2))) {
   15004     unsigned OtherElt = InVec.getConstantOperandVal(2);
   15005     if (Elt < OtherElt) {
   15006       // Swap nodes.
   15007       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
   15008                                   InVec.getOperand(0), InVal, EltNo);
   15009       AddToWorklist(NewOp.getNode());
   15010       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
   15011                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
   15012     }
   15013   }
   15014 
   15015   // If we can't generate a legal BUILD_VECTOR, exit
   15016   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
   15017     return SDValue();
   15018 
   15019   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
   15020   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
   15021   // vector elements.
   15022   SmallVector<SDValue, 8> Ops;
   15023   // Do not combine these two vectors if the output vector will not replace
   15024   // the input vector.
   15025   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
   15026     Ops.append(InVec.getNode()->op_begin(),
   15027                InVec.getNode()->op_end());
   15028   } else if (InVec.isUndef()) {
   15029     unsigned NElts = VT.getVectorNumElements();
   15030     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
   15031   } else {
   15032     return SDValue();
   15033   }
   15034 
   15035   // Insert the element
   15036   if (Elt < Ops.size()) {
   15037     // All the operands of BUILD_VECTOR must have the same type;
   15038     // we enforce that here.
   15039     EVT OpVT = Ops[0].getValueType();
   15040     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
   15041   }
   15042 
   15043   // Return the new vector
   15044   return DAG.getBuildVector(VT, DL, Ops);
   15045 }
   15046 
   15047 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
   15048     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
   15049   assert(!OriginalLoad->isVolatile());
   15050 
   15051   EVT ResultVT = EVE->getValueType(0);
   15052   EVT VecEltVT = InVecVT.getVectorElementType();
   15053   unsigned Align = OriginalLoad->getAlignment();
   15054   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
   15055       VecEltVT.getTypeForEVT(*DAG.getContext()));
   15056 
   15057   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
   15058     return SDValue();
   15059 
   15060   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
   15061     ISD::NON_EXTLOAD : ISD::EXTLOAD;
   15062   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
   15063     return SDValue();
   15064 
   15065   Align = NewAlign;
   15066 
   15067   SDValue NewPtr = OriginalLoad->getBasePtr();
   15068   SDValue Offset;
   15069   EVT PtrType = NewPtr.getValueType();
   15070   MachinePointerInfo MPI;
   15071   SDLoc DL(EVE);
   15072   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
   15073     int Elt = ConstEltNo->getZExtValue();
   15074     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
   15075     Offset = DAG.getConstant(PtrOff, DL, PtrType);
   15076     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
   15077   } else {
   15078     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
   15079     Offset = DAG.getNode(
   15080         ISD::MUL, DL, PtrType, Offset,
   15081         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
   15082     MPI = OriginalLoad->getPointerInfo();
   15083   }
   15084   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
   15085 
   15086   // The replacement we need to do here is a little tricky: we need to
   15087   // replace an extractelement of a load with a load.
   15088   // Use ReplaceAllUsesOfValuesWith to do the replacement.
   15089   // Note that this replacement assumes that the extractvalue is the only
   15090   // use of the load; that's okay because we don't want to perform this
   15091   // transformation in other cases anyway.
   15092   SDValue Load;
   15093   SDValue Chain;
   15094   if (ResultVT.bitsGT(VecEltVT)) {
   15095     // If the result type of vextract is wider than the load, then issue an
   15096     // extending load instead.
   15097     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
   15098                                                   VecEltVT)
   15099                                    ? ISD::ZEXTLOAD
   15100                                    : ISD::EXTLOAD;
   15101     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
   15102                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
   15103                           Align, OriginalLoad->getMemOperand()->getFlags(),
   15104                           OriginalLoad->getAAInfo());
   15105     Chain = Load.getValue(1);
   15106   } else {
   15107     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
   15108                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
   15109                        OriginalLoad->getAAInfo());
   15110     Chain = Load.getValue(1);
   15111     if (ResultVT.bitsLT(VecEltVT))
   15112       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
   15113     else
   15114       Load = DAG.getBitcast(ResultVT, Load);
   15115   }
   15116   WorklistRemover DeadNodes(*this);
   15117   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
   15118   SDValue To[] = { Load, Chain };
   15119   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
   15120   // Since we're explicitly calling ReplaceAllUses, add the new node to the
   15121   // worklist explicitly as well.
   15122   AddToWorklist(Load.getNode());
   15123   AddUsersToWorklist(Load.getNode()); // Add users too
   15124   // Make sure to revisit this node to clean it up; it will usually be dead.
   15125   AddToWorklist(EVE);
   15126   ++OpsNarrowed;
   15127   return SDValue(EVE, 0);
   15128 }
   15129 
   15130 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   15131   // (vextract (scalar_to_vector val, 0) -> val
   15132   SDValue InVec = N->getOperand(0);
   15133   EVT VT = InVec.getValueType();
   15134   EVT NVT = N->getValueType(0);
   15135 
   15136   if (InVec.isUndef())
   15137     return DAG.getUNDEF(NVT);
   15138 
   15139   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
   15140     // Check if the result type doesn't match the inserted element type. A
   15141     // SCALAR_TO_VECTOR may truncate the inserted element and the
   15142     // EXTRACT_VECTOR_ELT may widen the extracted vector.
   15143     SDValue InOp = InVec.getOperand(0);
   15144     if (InOp.getValueType() != NVT) {
   15145       assert(InOp.getValueType().isInteger() && NVT.isInteger());
   15146       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
   15147     }
   15148     return InOp;
   15149   }
   15150 
   15151   SDValue EltNo = N->getOperand(1);
   15152   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
   15153 
   15154   // extract_vector_elt of out-of-bounds element -> UNDEF
   15155   if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements()))
   15156     return DAG.getUNDEF(NVT);
   15157 
   15158   // extract_vector_elt (build_vector x, y), 1 -> y
   15159   if (ConstEltNo &&
   15160       InVec.getOpcode() == ISD::BUILD_VECTOR &&
   15161       TLI.isTypeLegal(VT) &&
   15162       (InVec.hasOneUse() ||
   15163        TLI.aggressivelyPreferBuildVectorSources(VT))) {
   15164     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
   15165     EVT InEltVT = Elt.getValueType();
   15166 
   15167     // Sometimes build_vector's scalar input types do not match result type.
   15168     if (NVT == InEltVT)
   15169       return Elt;
   15170 
   15171     // TODO: It may be useful to truncate if free if the build_vector implicitly
   15172     // converts.
   15173   }
   15174 
   15175   // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x)
   15176   bool isLE = DAG.getDataLayout().isLittleEndian();
   15177   unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1;
   15178   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
   15179       ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) {
   15180     SDValue BCSrc = InVec.getOperand(0);
   15181     if (BCSrc.getValueType().isScalarInteger())
   15182       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
   15183   }
   15184 
   15185   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
   15186   //
   15187   // This only really matters if the index is non-constant since other combines
   15188   // on the constant elements already work.
   15189   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
   15190       EltNo == InVec.getOperand(2)) {
   15191     SDValue Elt = InVec.getOperand(1);
   15192     return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
   15193   }
   15194 
   15195   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
   15196   // We only perform this optimization before the op legalization phase because
   15197   // we may introduce new vector instructions which are not backed by TD
   15198   // patterns. For example on AVX, extracting elements from a wide vector
   15199   // without using extract_subvector. However, if we can find an underlying
   15200   // scalar value, then we can always use that.
   15201   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
   15202     int NumElem = VT.getVectorNumElements();
   15203     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
   15204     // Find the new index to extract from.
   15205     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
   15206 
   15207     // Extracting an undef index is undef.
   15208     if (OrigElt == -1)
   15209       return DAG.getUNDEF(NVT);
   15210 
   15211     // Select the right vector half to extract from.
   15212     SDValue SVInVec;
   15213     if (OrigElt < NumElem) {
   15214       SVInVec = InVec->getOperand(0);
   15215     } else {
   15216       SVInVec = InVec->getOperand(1);
   15217       OrigElt -= NumElem;
   15218     }
   15219 
   15220     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
   15221       SDValue InOp = SVInVec.getOperand(OrigElt);
   15222       if (InOp.getValueType() != NVT) {
   15223         assert(InOp.getValueType().isInteger() && NVT.isInteger());
   15224         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
   15225       }
   15226 
   15227       return InOp;
   15228     }
   15229 
   15230     // FIXME: We should handle recursing on other vector shuffles and
   15231     // scalar_to_vector here as well.
   15232 
   15233     if (!LegalOperations ||
   15234         // FIXME: Should really be just isOperationLegalOrCustom.
   15235         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) ||
   15236         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) {
   15237       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
   15238       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
   15239                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
   15240     }
   15241   }
   15242 
   15243   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
   15244   // simplify it based on the (valid) extraction indices.
   15245   if (llvm::all_of(InVec->uses(), [&](SDNode *Use) {
   15246         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
   15247                Use->getOperand(0) == InVec &&
   15248                isa<ConstantSDNode>(Use->getOperand(1));
   15249       })) {
   15250     APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements());
   15251     for (SDNode *Use : InVec->uses()) {
   15252       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
   15253       if (CstElt->getAPIntValue().ult(VT.getVectorNumElements()))
   15254         DemandedElts.setBit(CstElt->getZExtValue());
   15255     }
   15256     if (SimplifyDemandedVectorElts(InVec, DemandedElts, true))
   15257       return SDValue(N, 0);
   15258   }
   15259 
   15260   bool BCNumEltsChanged = false;
   15261   EVT ExtVT = VT.getVectorElementType();
   15262   EVT LVT = ExtVT;
   15263 
   15264   // If the result of load has to be truncated, then it's not necessarily
   15265   // profitable.
   15266   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
   15267     return SDValue();
   15268 
   15269   if (InVec.getOpcode() == ISD::BITCAST) {
   15270     // Don't duplicate a load with other uses.
   15271     if (!InVec.hasOneUse())
   15272       return SDValue();
   15273 
   15274     EVT BCVT = InVec.getOperand(0).getValueType();
   15275     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
   15276       return SDValue();
   15277     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
   15278       BCNumEltsChanged = true;
   15279     InVec = InVec.getOperand(0);
   15280     ExtVT = BCVT.getVectorElementType();
   15281   }
   15282 
   15283   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
   15284   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
   15285       ISD::isNormalLoad(InVec.getNode()) &&
   15286       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
   15287     SDValue Index = N->getOperand(1);
   15288     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
   15289       if (!OrigLoad->isVolatile()) {
   15290         return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
   15291                                                              OrigLoad);
   15292       }
   15293     }
   15294   }
   15295 
   15296   // Perform only after legalization to ensure build_vector / vector_shuffle
   15297   // optimizations have already been done.
   15298   if (!LegalOperations) return SDValue();
   15299 
   15300   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
   15301   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
   15302   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
   15303 
   15304   if (ConstEltNo) {
   15305     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
   15306 
   15307     LoadSDNode *LN0 = nullptr;
   15308     const ShuffleVectorSDNode *SVN = nullptr;
   15309     if (ISD::isNormalLoad(InVec.getNode())) {
   15310       LN0 = cast<LoadSDNode>(InVec);
   15311     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
   15312                InVec.getOperand(0).getValueType() == ExtVT &&
   15313                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
   15314       // Don't duplicate a load with other uses.
   15315       if (!InVec.hasOneUse())
   15316         return SDValue();
   15317 
   15318       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
   15319     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
   15320       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
   15321       // =>
   15322       // (load $addr+1*size)
   15323 
   15324       // Don't duplicate a load with other uses.
   15325       if (!InVec.hasOneUse())
   15326         return SDValue();
   15327 
   15328       // If the bit convert changed the number of elements, it is unsafe
   15329       // to examine the mask.
   15330       if (BCNumEltsChanged)
   15331         return SDValue();
   15332 
   15333       // Select the input vector, guarding against out of range extract vector.
   15334       unsigned NumElems = VT.getVectorNumElements();
   15335       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
   15336       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
   15337 
   15338       if (InVec.getOpcode() == ISD::BITCAST) {
   15339         // Don't duplicate a load with other uses.
   15340         if (!InVec.hasOneUse())
   15341           return SDValue();
   15342 
   15343         InVec = InVec.getOperand(0);
   15344       }
   15345       if (ISD::isNormalLoad(InVec.getNode())) {
   15346         LN0 = cast<LoadSDNode>(InVec);
   15347         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
   15348         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
   15349       }
   15350     }
   15351 
   15352     // Make sure we found a non-volatile load and the extractelement is
   15353     // the only use.
   15354     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
   15355       return SDValue();
   15356 
   15357     // If Idx was -1 above, Elt is going to be -1, so just return undef.
   15358     if (Elt == -1)
   15359       return DAG.getUNDEF(LVT);
   15360 
   15361     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
   15362   }
   15363 
   15364   return SDValue();
   15365 }
   15366 
   15367 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
   15368 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
   15369   // We perform this optimization post type-legalization because
   15370   // the type-legalizer often scalarizes integer-promoted vectors.
   15371   // Performing this optimization before may create bit-casts which
   15372   // will be type-legalized to complex code sequences.
   15373   // We perform this optimization only before the operation legalizer because we
   15374   // may introduce illegal operations.
   15375   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
   15376     return SDValue();
   15377 
   15378   unsigned NumInScalars = N->getNumOperands();
   15379   SDLoc DL(N);
   15380   EVT VT = N->getValueType(0);
   15381 
   15382   // Check to see if this is a BUILD_VECTOR of a bunch of values
   15383   // which come from any_extend or zero_extend nodes. If so, we can create
   15384   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
   15385   // optimizations. We do not handle sign-extend because we can't fill the sign
   15386   // using shuffles.
   15387   EVT SourceType = MVT::Other;
   15388   bool AllAnyExt = true;
   15389 
   15390   for (unsigned i = 0; i != NumInScalars; ++i) {
   15391     SDValue In = N->getOperand(i);
   15392     // Ignore undef inputs.
   15393     if (In.isUndef()) continue;
   15394 
   15395     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
   15396     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
   15397 
   15398     // Abort if the element is not an extension.
   15399     if (!ZeroExt && !AnyExt) {
   15400       SourceType = MVT::Other;
   15401       break;
   15402     }
   15403 
   15404     // The input is a ZeroExt or AnyExt. Check the original type.
   15405     EVT InTy = In.getOperand(0).getValueType();
   15406 
   15407     // Check that all of the widened source types are the same.
   15408     if (SourceType == MVT::Other)
   15409       // First time.
   15410       SourceType = InTy;
   15411     else if (InTy != SourceType) {
   15412       // Multiple income types. Abort.
   15413       SourceType = MVT::Other;
   15414       break;
   15415     }
   15416 
   15417     // Check if all of the extends are ANY_EXTENDs.
   15418     AllAnyExt &= AnyExt;
   15419   }
   15420 
   15421   // In order to have valid types, all of the inputs must be extended from the
   15422   // same source type and all of the inputs must be any or zero extend.
   15423   // Scalar sizes must be a power of two.
   15424   EVT OutScalarTy = VT.getScalarType();
   15425   bool ValidTypes = SourceType != MVT::Other &&
   15426                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
   15427                  isPowerOf2_32(SourceType.getSizeInBits());
   15428 
   15429   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
   15430   // turn into a single shuffle instruction.
   15431   if (!ValidTypes)
   15432     return SDValue();
   15433 
   15434   bool isLE = DAG.getDataLayout().isLittleEndian();
   15435   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
   15436   assert(ElemRatio > 1 && "Invalid element size ratio");
   15437   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
   15438                                DAG.getConstant(0, DL, SourceType);
   15439 
   15440   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
   15441   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
   15442 
   15443   // Populate the new build_vector
   15444   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
   15445     SDValue Cast = N->getOperand(i);
   15446     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
   15447             Cast.getOpcode() == ISD::ZERO_EXTEND ||
   15448             Cast.isUndef()) && "Invalid cast opcode");
   15449     SDValue In;
   15450     if (Cast.isUndef())
   15451       In = DAG.getUNDEF(SourceType);
   15452     else
   15453       In = Cast->getOperand(0);
   15454     unsigned Index = isLE ? (i * ElemRatio) :
   15455                             (i * ElemRatio + (ElemRatio - 1));
   15456 
   15457     assert(Index < Ops.size() && "Invalid index");
   15458     Ops[Index] = In;
   15459   }
   15460 
   15461   // The type of the new BUILD_VECTOR node.
   15462   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
   15463   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
   15464          "Invalid vector size");
   15465   // Check if the new vector type is legal.
   15466   if (!isTypeLegal(VecVT) ||
   15467       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
   15468        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
   15469     return SDValue();
   15470 
   15471   // Make the new BUILD_VECTOR.
   15472   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
   15473 
   15474   // The new BUILD_VECTOR node has the potential to be further optimized.
   15475   AddToWorklist(BV.getNode());
   15476   // Bitcast to the desired type.
   15477   return DAG.getBitcast(VT, BV);
   15478 }
   15479 
   15480 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
   15481   EVT VT = N->getValueType(0);
   15482 
   15483   unsigned NumInScalars = N->getNumOperands();
   15484   SDLoc DL(N);
   15485 
   15486   EVT SrcVT = MVT::Other;
   15487   unsigned Opcode = ISD::DELETED_NODE;
   15488   unsigned NumDefs = 0;
   15489 
   15490   for (unsigned i = 0; i != NumInScalars; ++i) {
   15491     SDValue In = N->getOperand(i);
   15492     unsigned Opc = In.getOpcode();
   15493 
   15494     if (Opc == ISD::UNDEF)
   15495       continue;
   15496 
   15497     // If all scalar values are floats and converted from integers.
   15498     if (Opcode == ISD::DELETED_NODE &&
   15499         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
   15500       Opcode = Opc;
   15501     }
   15502 
   15503     if (Opc != Opcode)
   15504       return SDValue();
   15505 
   15506     EVT InVT = In.getOperand(0).getValueType();
   15507 
   15508     // If all scalar values are typed differently, bail out. It's chosen to
   15509     // simplify BUILD_VECTOR of integer types.
   15510     if (SrcVT == MVT::Other)
   15511       SrcVT = InVT;
   15512     if (SrcVT != InVT)
   15513       return SDValue();
   15514     NumDefs++;
   15515   }
   15516 
   15517   // If the vector has just one element defined, it's not worth to fold it into
   15518   // a vectorized one.
   15519   if (NumDefs < 2)
   15520     return SDValue();
   15521 
   15522   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
   15523          && "Should only handle conversion from integer to float.");
   15524   assert(SrcVT != MVT::Other && "Cannot determine source type!");
   15525 
   15526   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
   15527 
   15528   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
   15529     return SDValue();
   15530 
   15531   // Just because the floating-point vector type is legal does not necessarily
   15532   // mean that the corresponding integer vector type is.
   15533   if (!isTypeLegal(NVT))
   15534     return SDValue();
   15535 
   15536   SmallVector<SDValue, 8> Opnds;
   15537   for (unsigned i = 0; i != NumInScalars; ++i) {
   15538     SDValue In = N->getOperand(i);
   15539 
   15540     if (In.isUndef())
   15541       Opnds.push_back(DAG.getUNDEF(SrcVT));
   15542     else
   15543       Opnds.push_back(In.getOperand(0));
   15544   }
   15545   SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
   15546   AddToWorklist(BV.getNode());
   15547 
   15548   return DAG.getNode(Opcode, DL, VT, BV);
   15549 }
   15550 
   15551 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
   15552                                            ArrayRef<int> VectorMask,
   15553                                            SDValue VecIn1, SDValue VecIn2,
   15554                                            unsigned LeftIdx) {
   15555   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
   15556   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
   15557 
   15558   EVT VT = N->getValueType(0);
   15559   EVT InVT1 = VecIn1.getValueType();
   15560   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
   15561 
   15562   unsigned Vec2Offset = 0;
   15563   unsigned NumElems = VT.getVectorNumElements();
   15564   unsigned ShuffleNumElems = NumElems;
   15565 
   15566   // In case both the input vectors are extracted from same base
   15567   // vector we do not need extra addend (Vec2Offset) while
   15568   // computing shuffle mask.
   15569   if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
   15570       !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
   15571       !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
   15572     Vec2Offset = InVT1.getVectorNumElements();
   15573 
   15574   // We can't generate a shuffle node with mismatched input and output types.
   15575   // Try to make the types match the type of the output.
   15576   if (InVT1 != VT || InVT2 != VT) {
   15577     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
   15578       // If the output vector length is a multiple of both input lengths,
   15579       // we can concatenate them and pad the rest with undefs.
   15580       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
   15581       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
   15582       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
   15583       ConcatOps[0] = VecIn1;
   15584       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
   15585       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
   15586       VecIn2 = SDValue();
   15587     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
   15588       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
   15589         return SDValue();
   15590 
   15591       if (!VecIn2.getNode()) {
   15592         // If we only have one input vector, and it's twice the size of the
   15593         // output, split it in two.
   15594         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
   15595                              DAG.getConstant(NumElems, DL, IdxTy));
   15596         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
   15597         // Since we now have shorter input vectors, adjust the offset of the
   15598         // second vector's start.
   15599         Vec2Offset = NumElems;
   15600       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
   15601         // VecIn1 is wider than the output, and we have another, possibly
   15602         // smaller input. Pad the smaller input with undefs, shuffle at the
   15603         // input vector width, and extract the output.
   15604         // The shuffle type is different than VT, so check legality again.
   15605         if (LegalOperations &&
   15606             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
   15607           return SDValue();
   15608 
   15609         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
   15610         // lower it back into a BUILD_VECTOR. So if the inserted type is
   15611         // illegal, don't even try.
   15612         if (InVT1 != InVT2) {
   15613           if (!TLI.isTypeLegal(InVT2))
   15614             return SDValue();
   15615           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
   15616                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
   15617         }
   15618         ShuffleNumElems = NumElems * 2;
   15619       } else {
   15620         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
   15621         // than VecIn1. We can't handle this for now - this case will disappear
   15622         // when we start sorting the vectors by type.
   15623         return SDValue();
   15624       }
   15625     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
   15626                InVT1.getSizeInBits() == VT.getSizeInBits()) {
   15627       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
   15628       ConcatOps[0] = VecIn2;
   15629       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
   15630     } else {
   15631       // TODO: Support cases where the length mismatch isn't exactly by a
   15632       // factor of 2.
   15633       // TODO: Move this check upwards, so that if we have bad type
   15634       // mismatches, we don't create any DAG nodes.
   15635       return SDValue();
   15636     }
   15637   }
   15638 
   15639   // Initialize mask to undef.
   15640   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
   15641 
   15642   // Only need to run up to the number of elements actually used, not the
   15643   // total number of elements in the shuffle - if we are shuffling a wider
   15644   // vector, the high lanes should be set to undef.
   15645   for (unsigned i = 0; i != NumElems; ++i) {
   15646     if (VectorMask[i] <= 0)
   15647       continue;
   15648 
   15649     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
   15650     if (VectorMask[i] == (int)LeftIdx) {
   15651       Mask[i] = ExtIndex;
   15652     } else if (VectorMask[i] == (int)LeftIdx + 1) {
   15653       Mask[i] = Vec2Offset + ExtIndex;
   15654     }
   15655   }
   15656 
   15657   // The type the input vectors may have changed above.
   15658   InVT1 = VecIn1.getValueType();
   15659 
   15660   // If we already have a VecIn2, it should have the same type as VecIn1.
   15661   // If we don't, get an undef/zero vector of the appropriate type.
   15662   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
   15663   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
   15664 
   15665   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
   15666   if (ShuffleNumElems > NumElems)
   15667     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
   15668 
   15669   return Shuffle;
   15670 }
   15671 
   15672 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
   15673 // operations. If the types of the vectors we're extracting from allow it,
   15674 // turn this into a vector_shuffle node.
   15675 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
   15676   SDLoc DL(N);
   15677   EVT VT = N->getValueType(0);
   15678 
   15679   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
   15680   if (!isTypeLegal(VT))
   15681     return SDValue();
   15682 
   15683   // May only combine to shuffle after legalize if shuffle is legal.
   15684   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
   15685     return SDValue();
   15686 
   15687   bool UsesZeroVector = false;
   15688   unsigned NumElems = N->getNumOperands();
   15689 
   15690   // Record, for each element of the newly built vector, which input vector
   15691   // that element comes from. -1 stands for undef, 0 for the zero vector,
   15692   // and positive values for the input vectors.
   15693   // VectorMask maps each element to its vector number, and VecIn maps vector
   15694   // numbers to their initial SDValues.
   15695 
   15696   SmallVector<int, 8> VectorMask(NumElems, -1);
   15697   SmallVector<SDValue, 8> VecIn;
   15698   VecIn.push_back(SDValue());
   15699 
   15700   for (unsigned i = 0; i != NumElems; ++i) {
   15701     SDValue Op = N->getOperand(i);
   15702 
   15703     if (Op.isUndef())
   15704       continue;
   15705 
   15706     // See if we can use a blend with a zero vector.
   15707     // TODO: Should we generalize this to a blend with an arbitrary constant
   15708     // vector?
   15709     if (isNullConstant(Op) || isNullFPConstant(Op)) {
   15710       UsesZeroVector = true;
   15711       VectorMask[i] = 0;
   15712       continue;
   15713     }
   15714 
   15715     // Not an undef or zero. If the input is something other than an
   15716     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
   15717     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
   15718         !isa<ConstantSDNode>(Op.getOperand(1)))
   15719       return SDValue();
   15720     SDValue ExtractedFromVec = Op.getOperand(0);
   15721 
   15722     APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
   15723     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
   15724       return SDValue();
   15725 
   15726     // All inputs must have the same element type as the output.
   15727     if (VT.getVectorElementType() !=
   15728         ExtractedFromVec.getValueType().getVectorElementType())
   15729       return SDValue();
   15730 
   15731     // Have we seen this input vector before?
   15732     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
   15733     // a map back from SDValues to numbers isn't worth it.
   15734     unsigned Idx = std::distance(
   15735         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
   15736     if (Idx == VecIn.size())
   15737       VecIn.push_back(ExtractedFromVec);
   15738 
   15739     VectorMask[i] = Idx;
   15740   }
   15741 
   15742   // If we didn't find at least one input vector, bail out.
   15743   if (VecIn.size() < 2)
   15744     return SDValue();
   15745 
   15746   // If all the Operands of BUILD_VECTOR extract from same
   15747   // vector, then split the vector efficiently based on the maximum
   15748   // vector access index and adjust the VectorMask and
   15749   // VecIn accordingly.
   15750   if (VecIn.size() == 2) {
   15751     unsigned MaxIndex = 0;
   15752     unsigned NearestPow2 = 0;
   15753     SDValue Vec = VecIn.back();
   15754     EVT InVT = Vec.getValueType();
   15755     MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
   15756     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
   15757 
   15758     for (unsigned i = 0; i < NumElems; i++) {
   15759       if (VectorMask[i] <= 0)
   15760         continue;
   15761       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
   15762       IndexVec[i] = Index;
   15763       MaxIndex = std::max(MaxIndex, Index);
   15764     }
   15765 
   15766     NearestPow2 = PowerOf2Ceil(MaxIndex);
   15767     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
   15768         NumElems * 2 < NearestPow2) {
   15769       unsigned SplitSize = NearestPow2 / 2;
   15770       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
   15771                                      InVT.getVectorElementType(), SplitSize);
   15772       if (TLI.isTypeLegal(SplitVT)) {
   15773         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
   15774                                      DAG.getConstant(SplitSize, DL, IdxTy));
   15775         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
   15776                                      DAG.getConstant(0, DL, IdxTy));
   15777         VecIn.pop_back();
   15778         VecIn.push_back(VecIn1);
   15779         VecIn.push_back(VecIn2);
   15780 
   15781         for (unsigned i = 0; i < NumElems; i++) {
   15782           if (VectorMask[i] <= 0)
   15783             continue;
   15784           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
   15785         }
   15786       }
   15787     }
   15788   }
   15789 
   15790   // TODO: We want to sort the vectors by descending length, so that adjacent
   15791   // pairs have similar length, and the longer vector is always first in the
   15792   // pair.
   15793 
   15794   // TODO: Should this fire if some of the input vectors has illegal type (like
   15795   // it does now), or should we let legalization run its course first?
   15796 
   15797   // Shuffle phase:
   15798   // Take pairs of vectors, and shuffle them so that the result has elements
   15799   // from these vectors in the correct places.
   15800   // For example, given:
   15801   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
   15802   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
   15803   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
   15804   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
   15805   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
   15806   // We will generate:
   15807   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
   15808   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
   15809   SmallVector<SDValue, 4> Shuffles;
   15810   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
   15811     unsigned LeftIdx = 2 * In + 1;
   15812     SDValue VecLeft = VecIn[LeftIdx];
   15813     SDValue VecRight =
   15814         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
   15815 
   15816     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
   15817                                                 VecRight, LeftIdx))
   15818       Shuffles.push_back(Shuffle);
   15819     else
   15820       return SDValue();
   15821   }
   15822 
   15823   // If we need the zero vector as an "ingredient" in the blend tree, add it
   15824   // to the list of shuffles.
   15825   if (UsesZeroVector)
   15826     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
   15827                                       : DAG.getConstantFP(0.0, DL, VT));
   15828 
   15829   // If we only have one shuffle, we're done.
   15830   if (Shuffles.size() == 1)
   15831     return Shuffles[0];
   15832 
   15833   // Update the vector mask to point to the post-shuffle vectors.
   15834   for (int &Vec : VectorMask)
   15835     if (Vec == 0)
   15836       Vec = Shuffles.size() - 1;
   15837     else
   15838       Vec = (Vec - 1) / 2;
   15839 
   15840   // More than one shuffle. Generate a binary tree of blends, e.g. if from
   15841   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
   15842   // generate:
   15843   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
   15844   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
   15845   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
   15846   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
   15847   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
   15848   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
   15849   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
   15850 
   15851   // Make sure the initial size of the shuffle list is even.
   15852   if (Shuffles.size() % 2)
   15853     Shuffles.push_back(DAG.getUNDEF(VT));
   15854 
   15855   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
   15856     if (CurSize % 2) {
   15857       Shuffles[CurSize] = DAG.getUNDEF(VT);
   15858       CurSize++;
   15859     }
   15860     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
   15861       int Left = 2 * In;
   15862       int Right = 2 * In + 1;
   15863       SmallVector<int, 8> Mask(NumElems, -1);
   15864       for (unsigned i = 0; i != NumElems; ++i) {
   15865         if (VectorMask[i] == Left) {
   15866           Mask[i] = i;
   15867           VectorMask[i] = In;
   15868         } else if (VectorMask[i] == Right) {
   15869           Mask[i] = i + NumElems;
   15870           VectorMask[i] = In;
   15871         }
   15872       }
   15873 
   15874       Shuffles[In] =
   15875           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
   15876     }
   15877   }
   15878   return Shuffles[0];
   15879 }
   15880 
   15881 // Try to turn a build vector of zero extends of extract vector elts into a
   15882 // a vector zero extend and possibly an extract subvector.
   15883 // TODO: Support sign extend or any extend?
   15884 // TODO: Allow undef elements?
   15885 // TODO: Don't require the extracts to start at element 0.
   15886 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
   15887   if (LegalOperations)
   15888     return SDValue();
   15889 
   15890   EVT VT = N->getValueType(0);
   15891 
   15892   SDValue Op0 = N->getOperand(0);
   15893   auto checkElem = [&](SDValue Op) -> int64_t {
   15894     if (Op.getOpcode() == ISD::ZERO_EXTEND &&
   15895         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
   15896         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
   15897       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
   15898         return C->getZExtValue();
   15899     return -1;
   15900   };
   15901 
   15902   // Make sure the first element matches
   15903   // (zext (extract_vector_elt X, C))
   15904   int64_t Offset = checkElem(Op0);
   15905   if (Offset < 0)
   15906     return SDValue();
   15907 
   15908   unsigned NumElems = N->getNumOperands();
   15909   SDValue In = Op0.getOperand(0).getOperand(0);
   15910   EVT InSVT = In.getValueType().getScalarType();
   15911   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
   15912 
   15913   // Don't create an illegal input type after type legalization.
   15914   if (LegalTypes && !TLI.isTypeLegal(InVT))
   15915     return SDValue();
   15916 
   15917   // Ensure all the elements come from the same vector and are adjacent.
   15918   for (unsigned i = 1; i != NumElems; ++i) {
   15919     if ((Offset + i) != checkElem(N->getOperand(i)))
   15920       return SDValue();
   15921   }
   15922 
   15923   SDLoc DL(N);
   15924   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
   15925                    Op0.getOperand(0).getOperand(1));
   15926   return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
   15927 }
   15928 
   15929 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   15930   EVT VT = N->getValueType(0);
   15931 
   15932   // A vector built entirely of undefs is undef.
   15933   if (ISD::allOperandsUndef(N))
   15934     return DAG.getUNDEF(VT);
   15935 
   15936   // If this is a splat of a bitcast from another vector, change to a
   15937   // concat_vector.
   15938   // For example:
   15939   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
   15940   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
   15941   //
   15942   // If X is a build_vector itself, the concat can become a larger build_vector.
   15943   // TODO: Maybe this is useful for non-splat too?
   15944   if (!LegalOperations) {
   15945     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
   15946       Splat = peekThroughBitcast(Splat);
   15947       EVT SrcVT = Splat.getValueType();
   15948       if (SrcVT.isVector()) {
   15949         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
   15950         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
   15951                                      SrcVT.getVectorElementType(), NumElts);
   15952         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
   15953           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
   15954           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
   15955                                        NewVT, Ops);
   15956           return DAG.getBitcast(VT, Concat);
   15957         }
   15958       }
   15959     }
   15960   }
   15961 
   15962   // Check if we can express BUILD VECTOR via subvector extract.
   15963   if (!LegalTypes && (N->getNumOperands() > 1)) {
   15964     SDValue Op0 = N->getOperand(0);
   15965     auto checkElem = [&](SDValue Op) -> uint64_t {
   15966       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
   15967           (Op0.getOperand(0) == Op.getOperand(0)))
   15968         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
   15969           return CNode->getZExtValue();
   15970       return -1;
   15971     };
   15972 
   15973     int Offset = checkElem(Op0);
   15974     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
   15975       if (Offset + i != checkElem(N->getOperand(i))) {
   15976         Offset = -1;
   15977         break;
   15978       }
   15979     }
   15980 
   15981     if ((Offset == 0) &&
   15982         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
   15983       return Op0.getOperand(0);
   15984     if ((Offset != -1) &&
   15985         ((Offset % N->getValueType(0).getVectorNumElements()) ==
   15986          0)) // IDX must be multiple of output size.
   15987       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
   15988                          Op0.getOperand(0), Op0.getOperand(1));
   15989   }
   15990 
   15991   if (SDValue V = convertBuildVecZextToZext(N))
   15992     return V;
   15993 
   15994   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
   15995     return V;
   15996 
   15997   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
   15998     return V;
   15999 
   16000   if (SDValue V = reduceBuildVecToShuffle(N))
   16001     return V;
   16002 
   16003   return SDValue();
   16004 }
   16005 
   16006 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
   16007   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   16008   EVT OpVT = N->getOperand(0).getValueType();
   16009 
   16010   // If the operands are legal vectors, leave them alone.
   16011   if (TLI.isTypeLegal(OpVT))
   16012     return SDValue();
   16013 
   16014   SDLoc DL(N);
   16015   EVT VT = N->getValueType(0);
   16016   SmallVector<SDValue, 8> Ops;
   16017 
   16018   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
   16019   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
   16020 
   16021   // Keep track of what we encounter.
   16022   bool AnyInteger = false;
   16023   bool AnyFP = false;
   16024   for (const SDValue &Op : N->ops()) {
   16025     if (ISD::BITCAST == Op.getOpcode() &&
   16026         !Op.getOperand(0).getValueType().isVector())
   16027       Ops.push_back(Op.getOperand(0));
   16028     else if (ISD::UNDEF == Op.getOpcode())
   16029       Ops.push_back(ScalarUndef);
   16030     else
   16031       return SDValue();
   16032 
   16033     // Note whether we encounter an integer or floating point scalar.
   16034     // If it's neither, bail out, it could be something weird like x86mmx.
   16035     EVT LastOpVT = Ops.back().getValueType();
   16036     if (LastOpVT.isFloatingPoint())
   16037       AnyFP = true;
   16038     else if (LastOpVT.isInteger())
   16039       AnyInteger = true;
   16040     else
   16041       return SDValue();
   16042   }
   16043 
   16044   // If any of the operands is a floating point scalar bitcast to a vector,
   16045   // use floating point types throughout, and bitcast everything.
   16046   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
   16047   if (AnyFP) {
   16048     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
   16049     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
   16050     if (AnyInteger) {
   16051       for (SDValue &Op : Ops) {
   16052         if (Op.getValueType() == SVT)
   16053           continue;
   16054         if (Op.isUndef())
   16055           Op = ScalarUndef;
   16056         else
   16057           Op = DAG.getBitcast(SVT, Op);
   16058       }
   16059     }
   16060   }
   16061 
   16062   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
   16063                                VT.getSizeInBits() / SVT.getSizeInBits());
   16064   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
   16065 }
   16066 
   16067 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
   16068 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
   16069 // most two distinct vectors the same size as the result, attempt to turn this
   16070 // into a legal shuffle.
   16071 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
   16072   EVT VT = N->getValueType(0);
   16073   EVT OpVT = N->getOperand(0).getValueType();
   16074   int NumElts = VT.getVectorNumElements();
   16075   int NumOpElts = OpVT.getVectorNumElements();
   16076 
   16077   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
   16078   SmallVector<int, 8> Mask;
   16079 
   16080   for (SDValue Op : N->ops()) {
   16081     // Peek through any bitcast.
   16082     Op = peekThroughBitcast(Op);
   16083 
   16084     // UNDEF nodes convert to UNDEF shuffle mask values.
   16085     if (Op.isUndef()) {
   16086       Mask.append((unsigned)NumOpElts, -1);
   16087       continue;
   16088     }
   16089 
   16090     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
   16091       return SDValue();
   16092 
   16093     // What vector are we extracting the subvector from and at what index?
   16094     SDValue ExtVec = Op.getOperand(0);
   16095 
   16096     // We want the EVT of the original extraction to correctly scale the
   16097     // extraction index.
   16098     EVT ExtVT = ExtVec.getValueType();
   16099 
   16100     // Peek through any bitcast.
   16101     ExtVec = peekThroughBitcast(ExtVec);
   16102 
   16103     // UNDEF nodes convert to UNDEF shuffle mask values.
   16104     if (ExtVec.isUndef()) {
   16105       Mask.append((unsigned)NumOpElts, -1);
   16106       continue;
   16107     }
   16108 
   16109     if (!isa<ConstantSDNode>(Op.getOperand(1)))
   16110       return SDValue();
   16111     int ExtIdx = Op.getConstantOperandVal(1);
   16112 
   16113     // Ensure that we are extracting a subvector from a vector the same
   16114     // size as the result.
   16115     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
   16116       return SDValue();
   16117 
   16118     // Scale the subvector index to account for any bitcast.
   16119     int NumExtElts = ExtVT.getVectorNumElements();
   16120     if (0 == (NumExtElts % NumElts))
   16121       ExtIdx /= (NumExtElts / NumElts);
   16122     else if (0 == (NumElts % NumExtElts))
   16123       ExtIdx *= (NumElts / NumExtElts);
   16124     else
   16125       return SDValue();
   16126 
   16127     // At most we can reference 2 inputs in the final shuffle.
   16128     if (SV0.isUndef() || SV0 == ExtVec) {
   16129       SV0 = ExtVec;
   16130       for (int i = 0; i != NumOpElts; ++i)
   16131         Mask.push_back(i + ExtIdx);
   16132     } else if (SV1.isUndef() || SV1 == ExtVec) {
   16133       SV1 = ExtVec;
   16134       for (int i = 0; i != NumOpElts; ++i)
   16135         Mask.push_back(i + ExtIdx + NumElts);
   16136     } else {
   16137       return SDValue();
   16138     }
   16139   }
   16140 
   16141   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
   16142     return SDValue();
   16143 
   16144   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
   16145                               DAG.getBitcast(VT, SV1), Mask);
   16146 }
   16147 
   16148 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
   16149   // If we only have one input vector, we don't need to do any concatenation.
   16150   if (N->getNumOperands() == 1)
   16151     return N->getOperand(0);
   16152 
   16153   // Check if all of the operands are undefs.
   16154   EVT VT = N->getValueType(0);
   16155   if (ISD::allOperandsUndef(N))
   16156     return DAG.getUNDEF(VT);
   16157 
   16158   // Optimize concat_vectors where all but the first of the vectors are undef.
   16159   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
   16160         return Op.isUndef();
   16161       })) {
   16162     SDValue In = N->getOperand(0);
   16163     assert(In.getValueType().isVector() && "Must concat vectors");
   16164 
   16165     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
   16166     if (In->getOpcode() == ISD::BITCAST &&
   16167         !In->getOperand(0).getValueType().isVector()) {
   16168       SDValue Scalar = In->getOperand(0);
   16169 
   16170       // If the bitcast type isn't legal, it might be a trunc of a legal type;
   16171       // look through the trunc so we can still do the transform:
   16172       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
   16173       if (Scalar->getOpcode() == ISD::TRUNCATE &&
   16174           !TLI.isTypeLegal(Scalar.getValueType()) &&
   16175           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
   16176         Scalar = Scalar->getOperand(0);
   16177 
   16178       EVT SclTy = Scalar->getValueType(0);
   16179 
   16180       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
   16181         return SDValue();
   16182 
   16183       // Bail out if the vector size is not a multiple of the scalar size.
   16184       if (VT.getSizeInBits() % SclTy.getSizeInBits())
   16185         return SDValue();
   16186 
   16187       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
   16188       if (VNTNumElms < 2)
   16189         return SDValue();
   16190 
   16191       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
   16192       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
   16193         return SDValue();
   16194 
   16195       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
   16196       return DAG.getBitcast(VT, Res);
   16197     }
   16198   }
   16199 
   16200   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
   16201   // We have already tested above for an UNDEF only concatenation.
   16202   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
   16203   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
   16204   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
   16205     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
   16206   };
   16207   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
   16208     SmallVector<SDValue, 8> Opnds;
   16209     EVT SVT = VT.getScalarType();
   16210 
   16211     EVT MinVT = SVT;
   16212     if (!SVT.isFloatingPoint()) {
   16213       // If BUILD_VECTOR are from built from integer, they may have different
   16214       // operand types. Get the smallest type and truncate all operands to it.
   16215       bool FoundMinVT = false;
   16216       for (const SDValue &Op : N->ops())
   16217         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
   16218           EVT OpSVT = Op.getOperand(0).getValueType();
   16219           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
   16220           FoundMinVT = true;
   16221         }
   16222       assert(FoundMinVT && "Concat vector type mismatch");
   16223     }
   16224 
   16225     for (const SDValue &Op : N->ops()) {
   16226       EVT OpVT = Op.getValueType();
   16227       unsigned NumElts = OpVT.getVectorNumElements();
   16228 
   16229       if (ISD::UNDEF == Op.getOpcode())
   16230         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
   16231 
   16232       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
   16233         if (SVT.isFloatingPoint()) {
   16234           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
   16235           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
   16236         } else {
   16237           for (unsigned i = 0; i != NumElts; ++i)
   16238             Opnds.push_back(
   16239                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
   16240         }
   16241       }
   16242     }
   16243 
   16244     assert(VT.getVectorNumElements() == Opnds.size() &&
   16245            "Concat vector type mismatch");
   16246     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
   16247   }
   16248 
   16249   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
   16250   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
   16251     return V;
   16252 
   16253   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
   16254   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
   16255     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
   16256       return V;
   16257 
   16258   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
   16259   // nodes often generate nop CONCAT_VECTOR nodes.
   16260   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
   16261   // place the incoming vectors at the exact same location.
   16262   SDValue SingleSource = SDValue();
   16263   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
   16264 
   16265   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
   16266     SDValue Op = N->getOperand(i);
   16267 
   16268     if (Op.isUndef())
   16269       continue;
   16270 
   16271     // Check if this is the identity extract:
   16272     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
   16273       return SDValue();
   16274 
   16275     // Find the single incoming vector for the extract_subvector.
   16276     if (SingleSource.getNode()) {
   16277       if (Op.getOperand(0) != SingleSource)
   16278         return SDValue();
   16279     } else {
   16280       SingleSource = Op.getOperand(0);
   16281 
   16282       // Check the source type is the same as the type of the result.
   16283       // If not, this concat may extend the vector, so we can not
   16284       // optimize it away.
   16285       if (SingleSource.getValueType() != N->getValueType(0))
   16286         return SDValue();
   16287     }
   16288 
   16289     unsigned IdentityIndex = i * PartNumElem;
   16290     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
   16291     // The extract index must be constant.
   16292     if (!CS)
   16293       return SDValue();
   16294 
   16295     // Check that we are reading from the identity index.
   16296     if (CS->getZExtValue() != IdentityIndex)
   16297       return SDValue();
   16298   }
   16299 
   16300   if (SingleSource.getNode())
   16301     return SingleSource;
   16302 
   16303   return SDValue();
   16304 }
   16305 
   16306 /// If we are extracting a subvector produced by a wide binary operator with at
   16307 /// at least one operand that was the result of a vector concatenation, then try
   16308 /// to use the narrow vector operands directly to avoid the concatenation and
   16309 /// extraction.
   16310 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
   16311   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
   16312   // some of these bailouts with other transforms.
   16313 
   16314   // The extract index must be a constant, so we can map it to a concat operand.
   16315   auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
   16316   if (!ExtractIndex)
   16317     return SDValue();
   16318 
   16319   // Only handle the case where we are doubling and then halving. A larger ratio
   16320   // may require more than two narrow binops to replace the wide binop.
   16321   EVT VT = Extract->getValueType(0);
   16322   unsigned NumElems = VT.getVectorNumElements();
   16323   assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&
   16324          "Extract index is not a multiple of the vector length.");
   16325   if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
   16326     return SDValue();
   16327 
   16328   // We are looking for an optionally bitcasted wide vector binary operator
   16329   // feeding an extract subvector.
   16330   SDValue BinOp = peekThroughBitcast(Extract->getOperand(0));
   16331 
   16332   // TODO: The motivating case for this transform is an x86 AVX1 target. That
   16333   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
   16334   // flavors, but no other 256-bit integer support. This could be extended to
   16335   // handle any binop, but that may require fixing/adding other folds to avoid
   16336   // codegen regressions.
   16337   unsigned BOpcode = BinOp.getOpcode();
   16338   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
   16339     return SDValue();
   16340 
   16341   // The binop must be a vector type, so we can chop it in half.
   16342   EVT WideBVT = BinOp.getValueType();
   16343   if (!WideBVT.isVector())
   16344     return SDValue();
   16345 
   16346   // Bail out if the target does not support a narrower version of the binop.
   16347   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
   16348                                    WideBVT.getVectorNumElements() / 2);
   16349   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   16350   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
   16351     return SDValue();
   16352 
   16353   // Peek through bitcasts of the binary operator operands if needed.
   16354   SDValue LHS = peekThroughBitcast(BinOp.getOperand(0));
   16355   SDValue RHS = peekThroughBitcast(BinOp.getOperand(1));
   16356 
   16357   // We need at least one concatenation operation of a binop operand to make
   16358   // this transform worthwhile. The concat must double the input vector sizes.
   16359   // TODO: Should we also handle INSERT_SUBVECTOR patterns?
   16360   bool ConcatL =
   16361       LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
   16362   bool ConcatR =
   16363       RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
   16364   if (!ConcatL && !ConcatR)
   16365     return SDValue();
   16366 
   16367   // If one of the binop operands was not the result of a concat, we must
   16368   // extract a half-sized operand for our new narrow binop. We can't just reuse
   16369   // the original extract index operand because we may have bitcasted.
   16370   unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
   16371   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
   16372   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
   16373   SDLoc DL(Extract);
   16374 
   16375   // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
   16376   // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
   16377   // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
   16378   SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
   16379                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
   16380                                     BinOp.getOperand(0),
   16381                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
   16382 
   16383   SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
   16384                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
   16385                                     BinOp.getOperand(1),
   16386                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
   16387 
   16388   SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
   16389   return DAG.getBitcast(VT, NarrowBinOp);
   16390 }
   16391 
   16392 /// If we are extracting a subvector from a wide vector load, convert to a
   16393 /// narrow load to eliminate the extraction:
   16394 /// (extract_subvector (load wide vector)) --> (load narrow vector)
   16395 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
   16396   // TODO: Add support for big-endian. The offset calculation must be adjusted.
   16397   if (DAG.getDataLayout().isBigEndian())
   16398     return SDValue();
   16399 
   16400   // TODO: The one-use check is overly conservative. Check the cost of the
   16401   // extract instead or remove that condition entirely.
   16402   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
   16403   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
   16404   if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
   16405       !ExtIdx)
   16406     return SDValue();
   16407 
   16408   // The narrow load will be offset from the base address of the old load if
   16409   // we are extracting from something besides index 0 (little-endian).
   16410   EVT VT = Extract->getValueType(0);
   16411   SDLoc DL(Extract);
   16412   SDValue BaseAddr = Ld->getOperand(1);
   16413   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
   16414 
   16415   // TODO: Use "BaseIndexOffset" to make this more effective.
   16416   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
   16417   MachineFunction &MF = DAG.getMachineFunction();
   16418   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
   16419                                                    VT.getStoreSize());
   16420   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
   16421   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
   16422   return NewLd;
   16423 }
   16424 
   16425 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
   16426   EVT NVT = N->getValueType(0);
   16427   SDValue V = N->getOperand(0);
   16428 
   16429   // Extract from UNDEF is UNDEF.
   16430   if (V.isUndef())
   16431     return DAG.getUNDEF(NVT);
   16432 
   16433   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
   16434     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
   16435       return NarrowLoad;
   16436 
   16437   // Combine:
   16438   //    (extract_subvec (concat V1, V2, ...), i)
   16439   // Into:
   16440   //    Vi if possible
   16441   // Only operand 0 is checked as 'concat' assumes all inputs of the same
   16442   // type.
   16443   if (V->getOpcode() == ISD::CONCAT_VECTORS &&
   16444       isa<ConstantSDNode>(N->getOperand(1)) &&
   16445       V->getOperand(0).getValueType() == NVT) {
   16446     unsigned Idx = N->getConstantOperandVal(1);
   16447     unsigned NumElems = NVT.getVectorNumElements();
   16448     assert((Idx % NumElems) == 0 &&
   16449            "IDX in concat is not a multiple of the result vector length.");
   16450     return V->getOperand(Idx / NumElems);
   16451   }
   16452 
   16453   // Skip bitcasting
   16454   V = peekThroughBitcast(V);
   16455 
   16456   // If the input is a build vector. Try to make a smaller build vector.
   16457   if (V->getOpcode() == ISD::BUILD_VECTOR) {
   16458     if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
   16459       EVT InVT = V->getValueType(0);
   16460       unsigned ExtractSize = NVT.getSizeInBits();
   16461       unsigned EltSize = InVT.getScalarSizeInBits();
   16462       // Only do this if we won't split any elements.
   16463       if (ExtractSize % EltSize == 0) {
   16464         unsigned NumElems = ExtractSize / EltSize;
   16465         EVT EltVT = InVT.getVectorElementType();
   16466         EVT ExtractVT = NumElems == 1 ? EltVT :
   16467           EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
   16468         if ((Level < AfterLegalizeDAG ||
   16469              (NumElems == 1 ||
   16470               TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
   16471             (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
   16472           unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
   16473                             EltSize;
   16474           if (NumElems == 1) {
   16475             SDValue Src = V->getOperand(IdxVal);
   16476             if (EltVT != Src.getValueType())
   16477               Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
   16478 
   16479             return DAG.getBitcast(NVT, Src);
   16480           }
   16481 
   16482           // Extract the pieces from the original build_vector.
   16483           SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
   16484                                             makeArrayRef(V->op_begin() + IdxVal,
   16485                                                          NumElems));
   16486           return DAG.getBitcast(NVT, BuildVec);
   16487         }
   16488       }
   16489     }
   16490   }
   16491 
   16492   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
   16493     // Handle only simple case where vector being inserted and vector
   16494     // being extracted are of same size.
   16495     EVT SmallVT = V->getOperand(1).getValueType();
   16496     if (!NVT.bitsEq(SmallVT))
   16497       return SDValue();
   16498 
   16499     // Only handle cases where both indexes are constants.
   16500     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
   16501     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
   16502 
   16503     if (InsIdx && ExtIdx) {
   16504       // Combine:
   16505       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
   16506       // Into:
   16507       //    indices are equal or bit offsets are equal => V1
   16508       //    otherwise => (extract_subvec V1, ExtIdx)
   16509       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
   16510           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
   16511         return DAG.getBitcast(NVT, V->getOperand(1));
   16512       return DAG.getNode(
   16513           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
   16514           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
   16515           N->getOperand(1));
   16516     }
   16517   }
   16518 
   16519   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
   16520     return NarrowBOp;
   16521 
   16522   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
   16523     return SDValue(N, 0);
   16524 
   16525   return SDValue();
   16526 }
   16527 
   16528 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
   16529 // or turn a shuffle of a single concat into simpler shuffle then concat.
   16530 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
   16531   EVT VT = N->getValueType(0);
   16532   unsigned NumElts = VT.getVectorNumElements();
   16533 
   16534   SDValue N0 = N->getOperand(0);
   16535   SDValue N1 = N->getOperand(1);
   16536   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
   16537 
   16538   SmallVector<SDValue, 4> Ops;
   16539   EVT ConcatVT = N0.getOperand(0).getValueType();
   16540   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
   16541   unsigned NumConcats = NumElts / NumElemsPerConcat;
   16542 
   16543   // Special case: shuffle(concat(A,B)) can be more efficiently represented
   16544   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
   16545   // half vector elements.
   16546   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
   16547       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
   16548                   SVN->getMask().end(), [](int i) { return i == -1; })) {
   16549     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
   16550                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
   16551     N1 = DAG.getUNDEF(ConcatVT);
   16552     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
   16553   }
   16554 
   16555   // Look at every vector that's inserted. We're looking for exact
   16556   // subvector-sized copies from a concatenated vector
   16557   for (unsigned I = 0; I != NumConcats; ++I) {
   16558     // Make sure we're dealing with a copy.
   16559     unsigned Begin = I * NumElemsPerConcat;
   16560     bool AllUndef = true, NoUndef = true;
   16561     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
   16562       if (SVN->getMaskElt(J) >= 0)
   16563         AllUndef = false;
   16564       else
   16565         NoUndef = false;
   16566     }
   16567 
   16568     if (NoUndef) {
   16569       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
   16570         return SDValue();
   16571 
   16572       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
   16573         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
   16574           return SDValue();
   16575 
   16576       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
   16577       if (FirstElt < N0.getNumOperands())
   16578         Ops.push_back(N0.getOperand(FirstElt));
   16579       else
   16580         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
   16581 
   16582     } else if (AllUndef) {
   16583       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
   16584     } else { // Mixed with general masks and undefs, can't do optimization.
   16585       return SDValue();
   16586     }
   16587   }
   16588 
   16589   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
   16590 }
   16591 
   16592 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
   16593 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
   16594 //
   16595 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
   16596 // a simplification in some sense, but it isn't appropriate in general: some
   16597 // BUILD_VECTORs are substantially cheaper than others. The general case
   16598 // of a BUILD_VECTOR requires inserting each element individually (or
   16599 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
   16600 // all constants is a single constant pool load.  A BUILD_VECTOR where each
   16601 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
   16602 // are undef lowers to a small number of element insertions.
   16603 //
   16604 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
   16605 // We don't fold shuffles where one side is a non-zero constant, and we don't
   16606 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
   16607 // non-constant operands. This seems to work out reasonably well in practice.
   16608 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
   16609                                        SelectionDAG &DAG,
   16610                                        const TargetLowering &TLI) {
   16611   EVT VT = SVN->getValueType(0);
   16612   unsigned NumElts = VT.getVectorNumElements();
   16613   SDValue N0 = SVN->getOperand(0);
   16614   SDValue N1 = SVN->getOperand(1);
   16615 
   16616   if (!N0->hasOneUse() || !N1->hasOneUse())
   16617     return SDValue();
   16618 
   16619   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
   16620   // discussed above.
   16621   if (!N1.isUndef()) {
   16622     bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
   16623     bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
   16624     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
   16625       return SDValue();
   16626     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
   16627       return SDValue();
   16628   }
   16629 
   16630   // If both inputs are splats of the same value then we can safely merge this
   16631   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
   16632   bool IsSplat = false;
   16633   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
   16634   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
   16635   if (BV0 && BV1)
   16636     if (SDValue Splat0 = BV0->getSplatValue())
   16637       IsSplat = (Splat0 == BV1->getSplatValue());
   16638 
   16639   SmallVector<SDValue, 8> Ops;
   16640   SmallSet<SDValue, 16> DuplicateOps;
   16641   for (int M : SVN->getMask()) {
   16642     SDValue Op = DAG.getUNDEF(VT.getScalarType());
   16643     if (M >= 0) {
   16644       int Idx = M < (int)NumElts ? M : M - NumElts;
   16645       SDValue &S = (M < (int)NumElts ? N0 : N1);
   16646       if (S.getOpcode() == ISD::BUILD_VECTOR) {
   16647         Op = S.getOperand(Idx);
   16648       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
   16649         assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.");
   16650         Op = S.getOperand(0);
   16651       } else {
   16652         // Operand can't be combined - bail out.
   16653         return SDValue();
   16654       }
   16655     }
   16656 
   16657     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
   16658     // generating a splat; semantically, this is fine, but it's likely to
   16659     // generate low-quality code if the target can't reconstruct an appropriate
   16660     // shuffle.
   16661     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
   16662       if (!IsSplat && !DuplicateOps.insert(Op).second)
   16663         return SDValue();
   16664 
   16665     Ops.push_back(Op);
   16666   }
   16667 
   16668   // BUILD_VECTOR requires all inputs to be of the same type, find the
   16669   // maximum type and extend them all.
   16670   EVT SVT = VT.getScalarType();
   16671   if (SVT.isInteger())
   16672     for (SDValue &Op : Ops)
   16673       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
   16674   if (SVT != VT.getScalarType())
   16675     for (SDValue &Op : Ops)
   16676       Op = TLI.isZExtFree(Op.getValueType(), SVT)
   16677                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
   16678                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
   16679   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
   16680 }
   16681 
   16682 // Match shuffles that can be converted to any_vector_extend_in_reg.
   16683 // This is often generated during legalization.
   16684 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
   16685 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
   16686 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
   16687                                             SelectionDAG &DAG,
   16688                                             const TargetLowering &TLI,
   16689                                             bool LegalOperations,
   16690                                             bool LegalTypes) {
   16691   EVT VT = SVN->getValueType(0);
   16692   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
   16693 
   16694   // TODO Add support for big-endian when we have a test case.
   16695   if (!VT.isInteger() || IsBigEndian)
   16696     return SDValue();
   16697 
   16698   unsigned NumElts = VT.getVectorNumElements();
   16699   unsigned EltSizeInBits = VT.getScalarSizeInBits();
   16700   ArrayRef<int> Mask = SVN->getMask();
   16701   SDValue N0 = SVN->getOperand(0);
   16702 
   16703   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
   16704   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
   16705     for (unsigned i = 0; i != NumElts; ++i) {
   16706       if (Mask[i] < 0)
   16707         continue;
   16708       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
   16709         continue;
   16710       return false;
   16711     }
   16712     return true;
   16713   };
   16714 
   16715   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
   16716   // power-of-2 extensions as they are the most likely.
   16717   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
   16718     // Check for non power of 2 vector sizes
   16719     if (NumElts % Scale != 0)
   16720       continue;
   16721     if (!isAnyExtend(Scale))
   16722       continue;
   16723 
   16724     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
   16725     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
   16726     if (!LegalTypes || TLI.isTypeLegal(OutVT))
   16727       if (!LegalOperations ||
   16728           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
   16729         return DAG.getBitcast(VT,
   16730                             DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
   16731   }
   16732 
   16733   return SDValue();
   16734 }
   16735 
   16736 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
   16737 // each source element of a large type into the lowest elements of a smaller
   16738 // destination type. This is often generated during legalization.
   16739 // If the source node itself was a '*_extend_vector_inreg' node then we should
   16740 // then be able to remove it.
   16741 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
   16742                                         SelectionDAG &DAG) {
   16743   EVT VT = SVN->getValueType(0);
   16744   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
   16745 
   16746   // TODO Add support for big-endian when we have a test case.
   16747   if (!VT.isInteger() || IsBigEndian)
   16748     return SDValue();
   16749 
   16750   SDValue N0 = peekThroughBitcast(SVN->getOperand(0));
   16751 
   16752   unsigned Opcode = N0.getOpcode();
   16753   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
   16754       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
   16755       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
   16756     return SDValue();
   16757 
   16758   SDValue N00 = N0.getOperand(0);
   16759   ArrayRef<int> Mask = SVN->getMask();
   16760   unsigned NumElts = VT.getVectorNumElements();
   16761   unsigned EltSizeInBits = VT.getScalarSizeInBits();
   16762   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
   16763   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
   16764 
   16765   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
   16766     return SDValue();
   16767   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
   16768 
   16769   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
   16770   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
   16771   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
   16772   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
   16773     for (unsigned i = 0; i != NumElts; ++i) {
   16774       if (Mask[i] < 0)
   16775         continue;
   16776       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
   16777         continue;
   16778       return false;
   16779     }
   16780     return true;
   16781   };
   16782 
   16783   // At the moment we just handle the case where we've truncated back to the
   16784   // same size as before the extension.
   16785   // TODO: handle more extension/truncation cases as cases arise.
   16786   if (EltSizeInBits != ExtSrcSizeInBits)
   16787     return SDValue();
   16788 
   16789   // We can remove *extend_vector_inreg only if the truncation happens at
   16790   // the same scale as the extension.
   16791   if (isTruncate(ExtScale))
   16792     return DAG.getBitcast(VT, N00);
   16793 
   16794   return SDValue();
   16795 }
   16796 
   16797 // Combine shuffles of splat-shuffles of the form:
   16798 // shuffle (shuffle V, undef, splat-mask), undef, M
   16799 // If splat-mask contains undef elements, we need to be careful about
   16800 // introducing undef's in the folded mask which are not the result of composing
   16801 // the masks of the shuffles.
   16802 static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
   16803                                      ShuffleVectorSDNode *Splat,
   16804                                      SelectionDAG &DAG) {
   16805   ArrayRef<int> SplatMask = Splat->getMask();
   16806   assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
   16807 
   16808   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
   16809   // every undef mask element in the splat-shuffle has a corresponding undef
   16810   // element in the user-shuffle's mask or if the composition of mask elements
   16811   // would result in undef.
   16812   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
   16813   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
   16814   //   In this case it is not legal to simplify to the splat-shuffle because we
   16815   //   may be exposing the users of the shuffle an undef element at index 1
   16816   //   which was not there before the combine.
   16817   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
   16818   //   In this case the composition of masks yields SplatMask, so it's ok to
   16819   //   simplify to the splat-shuffle.
   16820   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
   16821   //   In this case the composed mask includes all undef elements of SplatMask
   16822   //   and in addition sets element zero to undef. It is safe to simplify to
   16823   //   the splat-shuffle.
   16824   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
   16825                                        ArrayRef<int> SplatMask) {
   16826     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
   16827       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
   16828           SplatMask[UserMask[i]] != -1)
   16829         return false;
   16830     return true;
   16831   };
   16832   if (CanSimplifyToExistingSplat(UserMask, SplatMask))
   16833     return SDValue(Splat, 0);
   16834 
   16835   // Create a new shuffle with a mask that is composed of the two shuffles'
   16836   // masks.
   16837   SmallVector<int, 32> NewMask;
   16838   for (int Idx : UserMask)
   16839     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
   16840 
   16841   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
   16842                               Splat->getOperand(0), Splat->getOperand(1),
   16843                               NewMask);
   16844 }
   16845 
   16846 /// If the shuffle mask is taking exactly one element from the first vector
   16847 /// operand and passing through all other elements from the second vector
   16848 /// operand, return the index of the mask element that is choosing an element
   16849 /// from the first operand. Otherwise, return -1.
   16850 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
   16851   int MaskSize = Mask.size();
   16852   int EltFromOp0 = -1;
   16853   // TODO: This does not match if there are undef elements in the shuffle mask.
   16854   // Should we ignore undefs in the shuffle mask instead? The trade-off is
   16855   // removing an instruction (a shuffle), but losing the knowledge that some
   16856   // vector lanes are not needed.
   16857   for (int i = 0; i != MaskSize; ++i) {
   16858     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
   16859       // We're looking for a shuffle of exactly one element from operand 0.
   16860       if (EltFromOp0 != -1)
   16861         return -1;
   16862       EltFromOp0 = i;
   16863     } else if (Mask[i] != i + MaskSize) {
   16864       // Nothing from operand 1 can change lanes.
   16865       return -1;
   16866     }
   16867   }
   16868   return EltFromOp0;
   16869 }
   16870 
   16871 /// If a shuffle inserts exactly one element from a source vector operand into
   16872 /// another vector operand and we can access the specified element as a scalar,
   16873 /// then we can eliminate the shuffle.
   16874 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
   16875                                       SelectionDAG &DAG) {
   16876   // First, check if we are taking one element of a vector and shuffling that
   16877   // element into another vector.
   16878   ArrayRef<int> Mask = Shuf->getMask();
   16879   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
   16880   SDValue Op0 = Shuf->getOperand(0);
   16881   SDValue Op1 = Shuf->getOperand(1);
   16882   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
   16883   if (ShufOp0Index == -1) {
   16884     // Commute mask and check again.
   16885     ShuffleVectorSDNode::commuteMask(CommutedMask);
   16886     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
   16887     if (ShufOp0Index == -1)
   16888       return SDValue();
   16889     // Commute operands to match the commuted shuffle mask.
   16890     std::swap(Op0, Op1);
   16891     Mask = CommutedMask;
   16892   }
   16893 
   16894   // The shuffle inserts exactly one element from operand 0 into operand 1.
   16895   // Now see if we can access that element as a scalar via a real insert element
   16896   // instruction.
   16897   // TODO: We can try harder to locate the element as a scalar. Examples: it
   16898   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
   16899   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
   16900          "Shuffle mask value must be from operand 0");
   16901   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
   16902     return SDValue();
   16903 
   16904   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
   16905   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
   16906     return SDValue();
   16907 
   16908   // There's an existing insertelement with constant insertion index, so we
   16909   // don't need to check the legality/profitability of a replacement operation
   16910   // that differs at most in the constant value. The target should be able to
   16911   // lower any of those in a similar way. If not, legalization will expand this
   16912   // to a scalar-to-vector plus shuffle.
   16913   //
   16914   // Note that the shuffle may move the scalar from the position that the insert
   16915   // element used. Therefore, our new insert element occurs at the shuffle's
   16916   // mask index value, not the insert's index value.
   16917   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
   16918   SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
   16919                                         Op0.getOperand(2).getValueType());
   16920   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
   16921                      Op1, Op0.getOperand(1), NewInsIndex);
   16922 }
   16923 
   16924 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   16925   EVT VT = N->getValueType(0);
   16926   unsigned NumElts = VT.getVectorNumElements();
   16927 
   16928   SDValue N0 = N->getOperand(0);
   16929   SDValue N1 = N->getOperand(1);
   16930 
   16931   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
   16932 
   16933   // Canonicalize shuffle undef, undef -> undef
   16934   if (N0.isUndef() && N1.isUndef())
   16935     return DAG.getUNDEF(VT);
   16936 
   16937   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
   16938 
   16939   // Canonicalize shuffle v, v -> v, undef
   16940   if (N0 == N1) {
   16941     SmallVector<int, 8> NewMask;
   16942     for (unsigned i = 0; i != NumElts; ++i) {
   16943       int Idx = SVN->getMaskElt(i);
   16944       if (Idx >= (int)NumElts) Idx -= NumElts;
   16945       NewMask.push_back(Idx);
   16946     }
   16947     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
   16948   }
   16949 
   16950   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
   16951   if (N0.isUndef())
   16952     return DAG.getCommutedVectorShuffle(*SVN);
   16953 
   16954   // Remove references to rhs if it is undef
   16955   if (N1.isUndef()) {
   16956     bool Changed = false;
   16957     SmallVector<int, 8> NewMask;
   16958     for (unsigned i = 0; i != NumElts; ++i) {
   16959       int Idx = SVN->getMaskElt(i);
   16960       if (Idx >= (int)NumElts) {
   16961         Idx = -1;
   16962         Changed = true;
   16963       }
   16964       NewMask.push_back(Idx);
   16965     }
   16966     if (Changed)
   16967       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
   16968   }
   16969 
   16970   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
   16971     return InsElt;
   16972 
   16973   // A shuffle of a single vector that is a splat can always be folded.
   16974   if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
   16975     if (N1->isUndef() && N0Shuf->isSplat())
   16976       return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
   16977 
   16978   // If it is a splat, check if the argument vector is another splat or a
   16979   // build_vector.
   16980   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
   16981     SDNode *V = N0.getNode();
   16982 
   16983     // If this is a bit convert that changes the element type of the vector but
   16984     // not the number of vector elements, look through it.  Be careful not to
   16985     // look though conversions that change things like v4f32 to v2f64.
   16986     if (V->getOpcode() == ISD::BITCAST) {
   16987       SDValue ConvInput = V->getOperand(0);
   16988       if (ConvInput.getValueType().isVector() &&
   16989           ConvInput.getValueType().getVectorNumElements() == NumElts)
   16990         V = ConvInput.getNode();
   16991     }
   16992 
   16993     if (V->getOpcode() == ISD::BUILD_VECTOR) {
   16994       assert(V->getNumOperands() == NumElts &&
   16995              "BUILD_VECTOR has wrong number of operands");
   16996       SDValue Base;
   16997       bool AllSame = true;
   16998       for (unsigned i = 0; i != NumElts; ++i) {
   16999         if (!V->getOperand(i).isUndef()) {
   17000           Base = V->getOperand(i);
   17001           break;
   17002         }
   17003       }
   17004       // Splat of <u, u, u, u>, return <u, u, u, u>
   17005       if (!Base.getNode())
   17006         return N0;
   17007       for (unsigned i = 0; i != NumElts; ++i) {
   17008         if (V->getOperand(i) != Base) {
   17009           AllSame = false;
   17010           break;
   17011         }
   17012       }
   17013       // Splat of <x, x, x, x>, return <x, x, x, x>
   17014       if (AllSame)
   17015         return N0;
   17016 
   17017       // Canonicalize any other splat as a build_vector.
   17018       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
   17019       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
   17020       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
   17021 
   17022       // We may have jumped through bitcasts, so the type of the
   17023       // BUILD_VECTOR may not match the type of the shuffle.
   17024       if (V->getValueType(0) != VT)
   17025         NewBV = DAG.getBitcast(VT, NewBV);
   17026       return NewBV;
   17027     }
   17028   }
   17029 
   17030   // Simplify source operands based on shuffle mask.
   17031   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
   17032     return SDValue(N, 0);
   17033 
   17034   // Match shuffles that can be converted to any_vector_extend_in_reg.
   17035   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
   17036     return V;
   17037 
   17038   // Combine "truncate_vector_in_reg" style shuffles.
   17039   if (SDValue V = combineTruncationShuffle(SVN, DAG))
   17040     return V;
   17041 
   17042   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
   17043       Level < AfterLegalizeVectorOps &&
   17044       (N1.isUndef() ||
   17045       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
   17046        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
   17047     if (SDValue V = partitionShuffleOfConcats(N, DAG))
   17048       return V;
   17049   }
   17050 
   17051   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
   17052   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
   17053   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
   17054     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
   17055       return Res;
   17056 
   17057   // If this shuffle only has a single input that is a bitcasted shuffle,
   17058   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
   17059   // back to their original types.
   17060   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
   17061       N1.isUndef() && Level < AfterLegalizeVectorOps &&
   17062       TLI.isTypeLegal(VT)) {
   17063 
   17064     // Peek through the bitcast only if there is one user.
   17065     SDValue BC0 = N0;
   17066     while (BC0.getOpcode() == ISD::BITCAST) {
   17067       if (!BC0.hasOneUse())
   17068         break;
   17069       BC0 = BC0.getOperand(0);
   17070     }
   17071 
   17072     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
   17073       if (Scale == 1)
   17074         return SmallVector<int, 8>(Mask.begin(), Mask.end());
   17075 
   17076       SmallVector<int, 8> NewMask;
   17077       for (int M : Mask)
   17078         for (int s = 0; s != Scale; ++s)
   17079           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
   17080       return NewMask;
   17081     };
   17082 
   17083     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
   17084       EVT SVT = VT.getScalarType();
   17085       EVT InnerVT = BC0->getValueType(0);
   17086       EVT InnerSVT = InnerVT.getScalarType();
   17087 
   17088       // Determine which shuffle works with the smaller scalar type.
   17089       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
   17090       EVT ScaleSVT = ScaleVT.getScalarType();
   17091 
   17092       if (TLI.isTypeLegal(ScaleVT) &&
   17093           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
   17094           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
   17095         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
   17096         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
   17097 
   17098         // Scale the shuffle masks to the smaller scalar type.
   17099         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
   17100         SmallVector<int, 8> InnerMask =
   17101             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
   17102         SmallVector<int, 8> OuterMask =
   17103             ScaleShuffleMask(SVN->getMask(), OuterScale);
   17104 
   17105         // Merge the shuffle masks.
   17106         SmallVector<int, 8> NewMask;
   17107         for (int M : OuterMask)
   17108           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
   17109 
   17110         // Test for shuffle mask legality over both commutations.
   17111         SDValue SV0 = BC0->getOperand(0);
   17112         SDValue SV1 = BC0->getOperand(1);
   17113         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
   17114         if (!LegalMask) {
   17115           std::swap(SV0, SV1);
   17116           ShuffleVectorSDNode::commuteMask(NewMask);
   17117           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
   17118         }
   17119 
   17120         if (LegalMask) {
   17121           SV0 = DAG.getBitcast(ScaleVT, SV0);
   17122           SV1 = DAG.getBitcast(ScaleVT, SV1);
   17123           return DAG.getBitcast(
   17124               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
   17125         }
   17126       }
   17127     }
   17128   }
   17129 
   17130   // Canonicalize shuffles according to rules:
   17131   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
   17132   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
   17133   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
   17134   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
   17135       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
   17136       TLI.isTypeLegal(VT)) {
   17137     // The incoming shuffle must be of the same type as the result of the
   17138     // current shuffle.
   17139     assert(N1->getOperand(0).getValueType() == VT &&
   17140            "Shuffle types don't match");
   17141 
   17142     SDValue SV0 = N1->getOperand(0);
   17143     SDValue SV1 = N1->getOperand(1);
   17144     bool HasSameOp0 = N0 == SV0;
   17145     bool IsSV1Undef = SV1.isUndef();
   17146     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
   17147       // Commute the operands of this shuffle so that next rule
   17148       // will trigger.
   17149       return DAG.getCommutedVectorShuffle(*SVN);
   17150   }
   17151 
   17152   // Try to fold according to rules:
   17153   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
   17154   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
   17155   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
   17156   // Don't try to fold shuffles with illegal type.
   17157   // Only fold if this shuffle is the only user of the other shuffle.
   17158   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
   17159       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
   17160     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
   17161 
   17162     // Don't try to fold splats; they're likely to simplify somehow, or they
   17163     // might be free.
   17164     if (OtherSV->isSplat())
   17165       return SDValue();
   17166 
   17167     // The incoming shuffle must be of the same type as the result of the
   17168     // current shuffle.
   17169     assert(OtherSV->getOperand(0).getValueType() == VT &&
   17170            "Shuffle types don't match");
   17171 
   17172     SDValue SV0, SV1;
   17173     SmallVector<int, 4> Mask;
   17174     // Compute the combined shuffle mask for a shuffle with SV0 as the first
   17175     // operand, and SV1 as the second operand.
   17176     for (unsigned i = 0; i != NumElts; ++i) {
   17177       int Idx = SVN->getMaskElt(i);
   17178       if (Idx < 0) {
   17179         // Propagate Undef.
   17180         Mask.push_back(Idx);
   17181         continue;
   17182       }
   17183 
   17184       SDValue CurrentVec;
   17185       if (Idx < (int)NumElts) {
   17186         // This shuffle index refers to the inner shuffle N0. Lookup the inner
   17187         // shuffle mask to identify which vector is actually referenced.
   17188         Idx = OtherSV->getMaskElt(Idx);
   17189         if (Idx < 0) {
   17190           // Propagate Undef.
   17191           Mask.push_back(Idx);
   17192           continue;
   17193         }
   17194 
   17195         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
   17196                                            : OtherSV->getOperand(1);
   17197       } else {
   17198         // This shuffle index references an element within N1.
   17199         CurrentVec = N1;
   17200       }
   17201 
   17202       // Simple case where 'CurrentVec' is UNDEF.
   17203       if (CurrentVec.isUndef()) {
   17204         Mask.push_back(-1);
   17205         continue;
   17206       }
   17207 
   17208       // Canonicalize the shuffle index. We don't know yet if CurrentVec
   17209       // will be the first or second operand of the combined shuffle.
   17210       Idx = Idx % NumElts;
   17211       if (!SV0.getNode() || SV0 == CurrentVec) {
   17212         // Ok. CurrentVec is the left hand side.
   17213         // Update the mask accordingly.
   17214         SV0 = CurrentVec;
   17215         Mask.push_back(Idx);
   17216         continue;
   17217       }
   17218 
   17219       // Bail out if we cannot convert the shuffle pair into a single shuffle.
   17220       if (SV1.getNode() && SV1 != CurrentVec)
   17221         return SDValue();
   17222 
   17223       // Ok. CurrentVec is the right hand side.
   17224       // Update the mask accordingly.
   17225       SV1 = CurrentVec;
   17226       Mask.push_back(Idx + NumElts);
   17227     }
   17228 
   17229     // Check if all indices in Mask are Undef. In case, propagate Undef.
   17230     bool isUndefMask = true;
   17231     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
   17232       isUndefMask &= Mask[i] < 0;
   17233 
   17234     if (isUndefMask)
   17235       return DAG.getUNDEF(VT);
   17236 
   17237     if (!SV0.getNode())
   17238       SV0 = DAG.getUNDEF(VT);
   17239     if (!SV1.getNode())
   17240       SV1 = DAG.getUNDEF(VT);
   17241 
   17242     // Avoid introducing shuffles with illegal mask.
   17243     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
   17244       ShuffleVectorSDNode::commuteMask(Mask);
   17245 
   17246       if (!TLI.isShuffleMaskLegal(Mask, VT))
   17247         return SDValue();
   17248 
   17249       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
   17250       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
   17251       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
   17252       std::swap(SV0, SV1);
   17253     }
   17254 
   17255     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
   17256     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
   17257     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
   17258     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
   17259   }
   17260 
   17261   return SDValue();
   17262 }
   17263 
   17264 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
   17265   SDValue InVal = N->getOperand(0);
   17266   EVT VT = N->getValueType(0);
   17267 
   17268   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
   17269   // with a VECTOR_SHUFFLE and possible truncate.
   17270   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
   17271     SDValue InVec = InVal->getOperand(0);
   17272     SDValue EltNo = InVal->getOperand(1);
   17273     auto InVecT = InVec.getValueType();
   17274     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
   17275       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
   17276       int Elt = C0->getZExtValue();
   17277       NewMask[0] = Elt;
   17278       SDValue Val;
   17279       // If we have an implict truncate do truncate here as long as it's legal.
   17280       // if it's not legal, this should
   17281       if (VT.getScalarType() != InVal.getValueType() &&
   17282           InVal.getValueType().isScalarInteger() &&
   17283           isTypeLegal(VT.getScalarType())) {
   17284         Val =
   17285             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
   17286         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
   17287       }
   17288       if (VT.getScalarType() == InVecT.getScalarType() &&
   17289           VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
   17290           TLI.isShuffleMaskLegal(NewMask, VT)) {
   17291         Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
   17292                                    DAG.getUNDEF(InVecT), NewMask);
   17293         // If the initial vector is the correct size this shuffle is a
   17294         // valid result.
   17295         if (VT == InVecT)
   17296           return Val;
   17297         // If not we must truncate the vector.
   17298         if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
   17299           MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
   17300           SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
   17301           EVT SubVT =
   17302               EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
   17303                                VT.getVectorNumElements());
   17304           Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
   17305                             ZeroIdx);
   17306           return Val;
   17307         }
   17308       }
   17309     }
   17310   }
   17311 
   17312   return SDValue();
   17313 }
   17314 
   17315 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
   17316   EVT VT = N->getValueType(0);
   17317   SDValue N0 = N->getOperand(0);
   17318   SDValue N1 = N->getOperand(1);
   17319   SDValue N2 = N->getOperand(2);
   17320 
   17321   // If inserting an UNDEF, just return the original vector.
   17322   if (N1.isUndef())
   17323     return N0;
   17324 
   17325   // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
   17326   // us to pull BITCASTs from input to output.
   17327   if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR)
   17328     if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode()))
   17329       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
   17330 
   17331   // If this is an insert of an extracted vector into an undef vector, we can
   17332   // just use the input to the extract.
   17333   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
   17334       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
   17335     return N1.getOperand(0);
   17336 
   17337   // If we are inserting a bitcast value into an undef, with the same
   17338   // number of elements, just use the bitcast input of the extract.
   17339   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
   17340   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
   17341   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
   17342       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
   17343       N1.getOperand(0).getOperand(1) == N2 &&
   17344       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
   17345           VT.getVectorNumElements() &&
   17346       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
   17347           VT.getSizeInBits()) {
   17348     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
   17349   }
   17350 
   17351   // If both N1 and N2 are bitcast values on which insert_subvector
   17352   // would makes sense, pull the bitcast through.
   17353   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
   17354   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
   17355   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
   17356     SDValue CN0 = N0.getOperand(0);
   17357     SDValue CN1 = N1.getOperand(0);
   17358     EVT CN0VT = CN0.getValueType();
   17359     EVT CN1VT = CN1.getValueType();
   17360     if (CN0VT.isVector() && CN1VT.isVector() &&
   17361         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
   17362         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
   17363       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
   17364                                       CN0.getValueType(), CN0, CN1, N2);
   17365       return DAG.getBitcast(VT, NewINSERT);
   17366     }
   17367   }
   17368 
   17369   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
   17370   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
   17371   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
   17372   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
   17373       N0.getOperand(1).getValueType() == N1.getValueType() &&
   17374       N0.getOperand(2) == N2)
   17375     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
   17376                        N1, N2);
   17377 
   17378   if (!isa<ConstantSDNode>(N2))
   17379     return SDValue();
   17380 
   17381   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
   17382 
   17383   // Canonicalize insert_subvector dag nodes.
   17384   // Example:
   17385   // (insert_subvector (insert_subvector A, Idx0), Idx1)
   17386   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
   17387   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
   17388       N1.getValueType() == N0.getOperand(1).getValueType() &&
   17389       isa<ConstantSDNode>(N0.getOperand(2))) {
   17390     unsigned OtherIdx = N0.getConstantOperandVal(2);
   17391     if (InsIdx < OtherIdx) {
   17392       // Swap nodes.
   17393       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
   17394                                   N0.getOperand(0), N1, N2);
   17395       AddToWorklist(NewOp.getNode());
   17396       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
   17397                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
   17398     }
   17399   }
   17400 
   17401   // If the input vector is a concatenation, and the insert replaces
   17402   // one of the pieces, we can optimize into a single concat_vectors.
   17403   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
   17404       N0.getOperand(0).getValueType() == N1.getValueType()) {
   17405     unsigned Factor = N1.getValueType().getVectorNumElements();
   17406 
   17407     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
   17408     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
   17409 
   17410     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
   17411   }
   17412 
   17413   return SDValue();
   17414 }
   17415 
   17416 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
   17417   SDValue N0 = N->getOperand(0);
   17418 
   17419   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
   17420   if (N0->getOpcode() == ISD::FP16_TO_FP)
   17421     return N0->getOperand(0);
   17422 
   17423   return SDValue();
   17424 }
   17425 
   17426 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
   17427   SDValue N0 = N->getOperand(0);
   17428 
   17429   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
   17430   if (N0->getOpcode() == ISD::AND) {
   17431     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
   17432     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
   17433       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
   17434                          N0.getOperand(0));
   17435     }
   17436   }
   17437 
   17438   return SDValue();
   17439 }
   17440 
   17441 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
   17442 /// with the destination vector and a zero vector.
   17443 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
   17444 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
   17445 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
   17446   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
   17447 
   17448   EVT VT = N->getValueType(0);
   17449   SDValue LHS = N->getOperand(0);
   17450   SDValue RHS = peekThroughBitcast(N->getOperand(1));
   17451   SDLoc DL(N);
   17452 
   17453   // Make sure we're not running after operation legalization where it
   17454   // may have custom lowered the vector shuffles.
   17455   if (LegalOperations)
   17456     return SDValue();
   17457 
   17458   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
   17459     return SDValue();
   17460 
   17461   EVT RVT = RHS.getValueType();
   17462   unsigned NumElts = RHS.getNumOperands();
   17463 
   17464   // Attempt to create a valid clear mask, splitting the mask into
   17465   // sub elements and checking to see if each is
   17466   // all zeros or all ones - suitable for shuffle masking.
   17467   auto BuildClearMask = [&](int Split) {
   17468     int NumSubElts = NumElts * Split;
   17469     int NumSubBits = RVT.getScalarSizeInBits() / Split;
   17470 
   17471     SmallVector<int, 8> Indices;
   17472     for (int i = 0; i != NumSubElts; ++i) {
   17473       int EltIdx = i / Split;
   17474       int SubIdx = i % Split;
   17475       SDValue Elt = RHS.getOperand(EltIdx);
   17476       if (Elt.isUndef()) {
   17477         Indices.push_back(-1);
   17478         continue;
   17479       }
   17480 
   17481       APInt Bits;
   17482       if (isa<ConstantSDNode>(Elt))
   17483         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
   17484       else if (isa<ConstantFPSDNode>(Elt))
   17485         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
   17486       else
   17487         return SDValue();
   17488 
   17489       // Extract the sub element from the constant bit mask.
   17490       if (DAG.getDataLayout().isBigEndian()) {
   17491         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
   17492       } else {
   17493         Bits.lshrInPlace(SubIdx * NumSubBits);
   17494       }
   17495 
   17496       if (Split > 1)
   17497         Bits = Bits.trunc(NumSubBits);
   17498 
   17499       if (Bits.isAllOnesValue())
   17500         Indices.push_back(i);
   17501       else if (Bits == 0)
   17502         Indices.push_back(i + NumSubElts);
   17503       else
   17504         return SDValue();
   17505     }
   17506 
   17507     // Let's see if the target supports this vector_shuffle.
   17508     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
   17509     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
   17510     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
   17511       return SDValue();
   17512 
   17513     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
   17514     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
   17515                                                    DAG.getBitcast(ClearVT, LHS),
   17516                                                    Zero, Indices));
   17517   };
   17518 
   17519   // Determine maximum split level (byte level masking).
   17520   int MaxSplit = 1;
   17521   if (RVT.getScalarSizeInBits() % 8 == 0)
   17522     MaxSplit = RVT.getScalarSizeInBits() / 8;
   17523 
   17524   for (int Split = 1; Split <= MaxSplit; ++Split)
   17525     if (RVT.getScalarSizeInBits() % Split == 0)
   17526       if (SDValue S = BuildClearMask(Split))
   17527         return S;
   17528 
   17529   return SDValue();
   17530 }
   17531 
   17532 /// Visit a binary vector operation, like ADD.
   17533 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
   17534   assert(N->getValueType(0).isVector() &&
   17535          "SimplifyVBinOp only works on vectors!");
   17536 
   17537   SDValue LHS = N->getOperand(0);
   17538   SDValue RHS = N->getOperand(1);
   17539   SDValue Ops[] = {LHS, RHS};
   17540 
   17541   // See if we can constant fold the vector operation.
   17542   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
   17543           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
   17544     return Fold;
   17545 
   17546   // Type legalization might introduce new shuffles in the DAG.
   17547   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
   17548   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
   17549   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
   17550       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
   17551       LHS.getOperand(1).isUndef() &&
   17552       RHS.getOperand(1).isUndef()) {
   17553     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
   17554     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
   17555 
   17556     if (SVN0->getMask().equals(SVN1->getMask())) {
   17557       EVT VT = N->getValueType(0);
   17558       SDValue UndefVector = LHS.getOperand(1);
   17559       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
   17560                                      LHS.getOperand(0), RHS.getOperand(0),
   17561                                      N->getFlags());
   17562       AddUsersToWorklist(N);
   17563       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
   17564                                   SVN0->getMask());
   17565     }
   17566   }
   17567 
   17568   return SDValue();
   17569 }
   17570 
   17571 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
   17572                                     SDValue N2) {
   17573   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
   17574 
   17575   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
   17576                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
   17577 
   17578   // If we got a simplified select_cc node back from SimplifySelectCC, then
   17579   // break it down into a new SETCC node, and a new SELECT node, and then return
   17580   // the SELECT node, since we were called with a SELECT node.
   17581   if (SCC.getNode()) {
   17582     // Check to see if we got a select_cc back (to turn into setcc/select).
   17583     // Otherwise, just return whatever node we got back, like fabs.
   17584     if (SCC.getOpcode() == ISD::SELECT_CC) {
   17585       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
   17586                                   N0.getValueType(),
   17587                                   SCC.getOperand(0), SCC.getOperand(1),
   17588                                   SCC.getOperand(4));
   17589       AddToWorklist(SETCC.getNode());
   17590       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
   17591                            SCC.getOperand(2), SCC.getOperand(3));
   17592     }
   17593 
   17594     return SCC;
   17595   }
   17596   return SDValue();
   17597 }
   17598 
   17599 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
   17600 /// being selected between, see if we can simplify the select.  Callers of this
   17601 /// should assume that TheSelect is deleted if this returns true.  As such, they
   17602 /// should return the appropriate thing (e.g. the node) back to the top-level of
   17603 /// the DAG combiner loop to avoid it being looked at.
   17604 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
   17605                                     SDValue RHS) {
   17606   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
   17607   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
   17608   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
   17609     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
   17610       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
   17611       SDValue Sqrt = RHS;
   17612       ISD::CondCode CC;
   17613       SDValue CmpLHS;
   17614       const ConstantFPSDNode *Zero = nullptr;
   17615 
   17616       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
   17617         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
   17618         CmpLHS = TheSelect->getOperand(0);
   17619         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
   17620       } else {
   17621         // SELECT or VSELECT
   17622         SDValue Cmp = TheSelect->getOperand(0);
   17623         if (Cmp.getOpcode() == ISD::SETCC) {
   17624           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
   17625           CmpLHS = Cmp.getOperand(0);
   17626           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
   17627         }
   17628       }
   17629       if (Zero && Zero->isZero() &&
   17630           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
   17631           CC == ISD::SETULT || CC == ISD::SETLT)) {
   17632         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
   17633         CombineTo(TheSelect, Sqrt);
   17634         return true;
   17635       }
   17636     }
   17637   }
   17638   // Cannot simplify select with vector condition
   17639   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
   17640 
   17641   // If this is a select from two identical things, try to pull the operation
   17642   // through the select.
   17643   if (LHS.getOpcode() != RHS.getOpcode() ||
   17644       !LHS.hasOneUse() || !RHS.hasOneUse())
   17645     return false;
   17646 
   17647   // If this is a load and the token chain is identical, replace the select
   17648   // of two loads with a load through a select of the address to load from.
   17649   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
   17650   // constants have been dropped into the constant pool.
   17651   if (LHS.getOpcode() == ISD::LOAD) {
   17652     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
   17653     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
   17654 
   17655     // Token chains must be identical.
   17656     if (LHS.getOperand(0) != RHS.getOperand(0) ||
   17657         // Do not let this transformation reduce the number of volatile loads.
   17658         LLD->isVolatile() || RLD->isVolatile() ||
   17659         // FIXME: If either is a pre/post inc/dec load,
   17660         // we'd need to split out the address adjustment.
   17661         LLD->isIndexed() || RLD->isIndexed() ||
   17662         // If this is an EXTLOAD, the VT's must match.
   17663         LLD->getMemoryVT() != RLD->getMemoryVT() ||
   17664         // If this is an EXTLOAD, the kind of extension must match.
   17665         (LLD->getExtensionType() != RLD->getExtensionType() &&
   17666          // The only exception is if one of the extensions is anyext.
   17667          LLD->getExtensionType() != ISD::EXTLOAD &&
   17668          RLD->getExtensionType() != ISD::EXTLOAD) ||
   17669         // FIXME: this discards src value information.  This is
   17670         // over-conservative. It would be beneficial to be able to remember
   17671         // both potential memory locations.  Since we are discarding
   17672         // src value info, don't do the transformation if the memory
   17673         // locations are not in the default address space.
   17674         LLD->getPointerInfo().getAddrSpace() != 0 ||
   17675         RLD->getPointerInfo().getAddrSpace() != 0 ||
   17676         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
   17677                                       LLD->getBasePtr().getValueType()))
   17678       return false;
   17679 
   17680     // Check that the select condition doesn't reach either load.  If so,
   17681     // folding this will induce a cycle into the DAG.  If not, this is safe to
   17682     // xform, so create a select of the addresses.
   17683     SDValue Addr;
   17684     if (TheSelect->getOpcode() == ISD::SELECT) {
   17685       SDNode *CondNode = TheSelect->getOperand(0).getNode();
   17686       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
   17687           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
   17688         return false;
   17689       // The loads must not depend on one another.
   17690       if (LLD->isPredecessorOf(RLD) ||
   17691           RLD->isPredecessorOf(LLD))
   17692         return false;
   17693       Addr = DAG.getSelect(SDLoc(TheSelect),
   17694                            LLD->getBasePtr().getValueType(),
   17695                            TheSelect->getOperand(0), LLD->getBasePtr(),
   17696                            RLD->getBasePtr());
   17697     } else {  // Otherwise SELECT_CC
   17698       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
   17699       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
   17700 
   17701       if ((LLD->hasAnyUseOfValue(1) &&
   17702            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
   17703           (RLD->hasAnyUseOfValue(1) &&
   17704            (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
   17705         return false;
   17706 
   17707       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
   17708                          LLD->getBasePtr().getValueType(),
   17709                          TheSelect->getOperand(0),
   17710                          TheSelect->getOperand(1),
   17711                          LLD->getBasePtr(), RLD->getBasePtr(),
   17712                          TheSelect->getOperand(4));
   17713     }
   17714 
   17715     SDValue Load;
   17716     // It is safe to replace the two loads if they have different alignments,
   17717     // but the new load must be the minimum (most restrictive) alignment of the
   17718     // inputs.
   17719     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
   17720     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
   17721     if (!RLD->isInvariant())
   17722       MMOFlags &= ~MachineMemOperand::MOInvariant;
   17723     if (!RLD->isDereferenceable())
   17724       MMOFlags &= ~MachineMemOperand::MODereferenceable;
   17725     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
   17726       // FIXME: Discards pointer and AA info.
   17727       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
   17728                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
   17729                          MMOFlags);
   17730     } else {
   17731       // FIXME: Discards pointer and AA info.
   17732       Load = DAG.getExtLoad(
   17733           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
   17734                                                   : LLD->getExtensionType(),
   17735           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
   17736           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
   17737     }
   17738 
   17739     // Users of the select now use the result of the load.
   17740     CombineTo(TheSelect, Load);
   17741 
   17742     // Users of the old loads now use the new load's chain.  We know the
   17743     // old-load value is dead now.
   17744     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
   17745     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
   17746     return true;
   17747   }
   17748 
   17749   return false;
   17750 }
   17751 
   17752 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
   17753 /// bitwise 'and'.
   17754 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
   17755                                             SDValue N1, SDValue N2, SDValue N3,
   17756                                             ISD::CondCode CC) {
   17757   // If this is a select where the false operand is zero and the compare is a
   17758   // check of the sign bit, see if we can perform the "gzip trick":
   17759   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
   17760   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
   17761   EVT XType = N0.getValueType();
   17762   EVT AType = N2.getValueType();
   17763   if (!isNullConstant(N3) || !XType.bitsGE(AType))
   17764     return SDValue();
   17765 
   17766   // If the comparison is testing for a positive value, we have to invert
   17767   // the sign bit mask, so only do that transform if the target has a bitwise
   17768   // 'and not' instruction (the invert is free).
   17769   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
   17770     // (X > -1) ? A : 0
   17771     // (X >  0) ? X : 0 <-- This is canonical signed max.
   17772     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
   17773       return SDValue();
   17774   } else if (CC == ISD::SETLT) {
   17775     // (X <  0) ? A : 0
   17776     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
   17777     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
   17778       return SDValue();
   17779   } else {
   17780     return SDValue();
   17781   }
   17782 
   17783   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
   17784   // constant.
   17785   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
   17786   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
   17787   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
   17788     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
   17789     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
   17790     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
   17791     AddToWorklist(Shift.getNode());
   17792 
   17793     if (XType.bitsGT(AType)) {
   17794       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
   17795       AddToWorklist(Shift.getNode());
   17796     }
   17797 
   17798     if (CC == ISD::SETGT)
   17799       Shift = DAG.getNOT(DL, Shift, AType);
   17800 
   17801     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
   17802   }
   17803 
   17804   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
   17805   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
   17806   AddToWorklist(Shift.getNode());
   17807 
   17808   if (XType.bitsGT(AType)) {
   17809     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
   17810     AddToWorklist(Shift.getNode());
   17811   }
   17812 
   17813   if (CC == ISD::SETGT)
   17814     Shift = DAG.getNOT(DL, Shift, AType);
   17815 
   17816   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
   17817 }
   17818 
   17819 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
   17820 /// where 'cond' is the comparison specified by CC.
   17821 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
   17822                                       SDValue N2, SDValue N3, ISD::CondCode CC,
   17823                                       bool NotExtCompare) {
   17824   // (x ? y : y) -> y.
   17825   if (N2 == N3) return N2;
   17826 
   17827   EVT VT = N2.getValueType();
   17828   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
   17829   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
   17830 
   17831   // Determine if the condition we're dealing with is constant
   17832   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
   17833                               N0, N1, CC, DL, false);
   17834   if (SCC.getNode()) AddToWorklist(SCC.getNode());
   17835 
   17836   if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
   17837     // fold select_cc true, x, y -> x
   17838     // fold select_cc false, x, y -> y
   17839     return !SCCC->isNullValue() ? N2 : N3;
   17840   }
   17841 
   17842   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
   17843   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
   17844   // in it.  This is a win when the constant is not otherwise available because
   17845   // it replaces two constant pool loads with one.  We only do this if the FP
   17846   // type is known to be legal, because if it isn't, then we are before legalize
   17847   // types an we want the other legalization to happen first (e.g. to avoid
   17848   // messing with soft float) and if the ConstantFP is not legal, because if
   17849   // it is legal, we may not need to store the FP constant in a constant pool.
   17850   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
   17851     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
   17852       if (TLI.isTypeLegal(N2.getValueType()) &&
   17853           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
   17854                TargetLowering::Legal &&
   17855            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
   17856            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
   17857           // If both constants have multiple uses, then we won't need to do an
   17858           // extra load, they are likely around in registers for other users.
   17859           (TV->hasOneUse() || FV->hasOneUse())) {
   17860         Constant *Elts[] = {
   17861           const_cast<ConstantFP*>(FV->getConstantFPValue()),
   17862           const_cast<ConstantFP*>(TV->getConstantFPValue())
   17863         };
   17864         Type *FPTy = Elts[0]->getType();
   17865         const DataLayout &TD = DAG.getDataLayout();
   17866 
   17867         // Create a ConstantArray of the two constants.
   17868         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
   17869         SDValue CPIdx =
   17870             DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
   17871                                 TD.getPrefTypeAlignment(FPTy));
   17872         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
   17873 
   17874         // Get the offsets to the 0 and 1 element of the array so that we can
   17875         // select between them.
   17876         SDValue Zero = DAG.getIntPtrConstant(0, DL);
   17877         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
   17878         SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
   17879 
   17880         SDValue Cond = DAG.getSetCC(DL,
   17881                                     getSetCCResultType(N0.getValueType()),
   17882                                     N0, N1, CC);
   17883         AddToWorklist(Cond.getNode());
   17884         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
   17885                                           Cond, One, Zero);
   17886         AddToWorklist(CstOffset.getNode());
   17887         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
   17888                             CstOffset);
   17889         AddToWorklist(CPIdx.getNode());
   17890         return DAG.getLoad(
   17891             TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
   17892             MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
   17893             Alignment);
   17894       }
   17895     }
   17896 
   17897   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
   17898     return V;
   17899 
   17900   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
   17901   // where y is has a single bit set.
   17902   // A plaintext description would be, we can turn the SELECT_CC into an AND
   17903   // when the condition can be materialized as an all-ones register.  Any
   17904   // single bit-test can be materialized as an all-ones register with
   17905   // shift-left and shift-right-arith.
   17906   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
   17907       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
   17908     SDValue AndLHS = N0->getOperand(0);
   17909     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
   17910     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
   17911       // Shift the tested bit over the sign bit.
   17912       const APInt &AndMask = ConstAndRHS->getAPIntValue();
   17913       SDValue ShlAmt =
   17914         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
   17915                         getShiftAmountTy(AndLHS.getValueType()));
   17916       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
   17917 
   17918       // Now arithmetic right shift it all the way over, so the result is either
   17919       // all-ones, or zero.
   17920       SDValue ShrAmt =
   17921         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
   17922                         getShiftAmountTy(Shl.getValueType()));
   17923       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
   17924 
   17925       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
   17926     }
   17927   }
   17928 
   17929   // fold select C, 16, 0 -> shl C, 4
   17930   if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
   17931       TLI.getBooleanContents(N0.getValueType()) ==
   17932           TargetLowering::ZeroOrOneBooleanContent) {
   17933 
   17934     // If the caller doesn't want us to simplify this into a zext of a compare,
   17935     // don't do it.
   17936     if (NotExtCompare && N2C->isOne())
   17937       return SDValue();
   17938 
   17939     // Get a SetCC of the condition
   17940     // NOTE: Don't create a SETCC if it's not legal on this target.
   17941     if (!LegalOperations ||
   17942         TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
   17943       SDValue Temp, SCC;
   17944       // cast from setcc result type to select result type
   17945       if (LegalTypes) {
   17946         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
   17947                             N0, N1, CC);
   17948         if (N2.getValueType().bitsLT(SCC.getValueType()))
   17949           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
   17950                                         N2.getValueType());
   17951         else
   17952           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
   17953                              N2.getValueType(), SCC);
   17954       } else {
   17955         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
   17956         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
   17957                            N2.getValueType(), SCC);
   17958       }
   17959 
   17960       AddToWorklist(SCC.getNode());
   17961       AddToWorklist(Temp.getNode());
   17962 
   17963       if (N2C->isOne())
   17964         return Temp;
   17965 
   17966       // shl setcc result by log2 n2c
   17967       return DAG.getNode(
   17968           ISD::SHL, DL, N2.getValueType(), Temp,
   17969           DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
   17970                           getShiftAmountTy(Temp.getValueType())));
   17971     }
   17972   }
   17973 
   17974   // Check to see if this is an integer abs.
   17975   // select_cc setg[te] X,  0,  X, -X ->
   17976   // select_cc setgt    X, -1,  X, -X ->
   17977   // select_cc setl[te] X,  0, -X,  X ->
   17978   // select_cc setlt    X,  1, -X,  X ->
   17979   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
   17980   if (N1C) {
   17981     ConstantSDNode *SubC = nullptr;
   17982     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
   17983          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
   17984         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
   17985       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
   17986     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
   17987               (N1C->isOne() && CC == ISD::SETLT)) &&
   17988              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
   17989       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
   17990 
   17991     EVT XType = N0.getValueType();
   17992     if (SubC && SubC->isNullValue() && XType.isInteger()) {
   17993       SDLoc DL(N0);
   17994       SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
   17995                                   N0,
   17996                                   DAG.getConstant(XType.getSizeInBits() - 1, DL,
   17997                                          getShiftAmountTy(N0.getValueType())));
   17998       SDValue Add = DAG.getNode(ISD::ADD, DL,
   17999                                 XType, N0, Shift);
   18000       AddToWorklist(Shift.getNode());
   18001       AddToWorklist(Add.getNode());
   18002       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
   18003     }
   18004   }
   18005 
   18006   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
   18007   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
   18008   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
   18009   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
   18010   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
   18011   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
   18012   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
   18013   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
   18014   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
   18015     SDValue ValueOnZero = N2;
   18016     SDValue Count = N3;
   18017     // If the condition is NE instead of E, swap the operands.
   18018     if (CC == ISD::SETNE)
   18019       std::swap(ValueOnZero, Count);
   18020     // Check if the value on zero is a constant equal to the bits in the type.
   18021     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
   18022       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
   18023         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
   18024         // legal, combine to just cttz.
   18025         if ((Count.getOpcode() == ISD::CTTZ ||
   18026              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
   18027             N0 == Count.getOperand(0) &&
   18028             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
   18029           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
   18030         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
   18031         // legal, combine to just ctlz.
   18032         if ((Count.getOpcode() == ISD::CTLZ ||
   18033              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
   18034             N0 == Count.getOperand(0) &&
   18035             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
   18036           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
   18037       }
   18038     }
   18039   }
   18040 
   18041   return SDValue();
   18042 }
   18043 
   18044 /// This is a stub for TargetLowering::SimplifySetCC.
   18045 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
   18046                                    ISD::CondCode Cond, const SDLoc &DL,
   18047                                    bool foldBooleans) {
   18048   TargetLowering::DAGCombinerInfo
   18049     DagCombineInfo(DAG, Level, false, this);
   18050   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
   18051 }
   18052 
   18053 /// Given an ISD::SDIV node expressing a divide by constant, return
   18054 /// a DAG expression to select that will generate the same value by multiplying
   18055 /// by a magic number.
   18056 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
   18057 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
   18058   // when optimising for minimum size, we don't want to expand a div to a mul
   18059   // and a shift.
   18060   if (DAG.getMachineFunction().getFunction().optForMinSize())
   18061     return SDValue();
   18062 
   18063   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
   18064   if (!C)
   18065     return SDValue();
   18066 
   18067   // Avoid division by zero.
   18068   if (C->isNullValue())
   18069     return SDValue();
   18070 
   18071   SmallVector<SDNode *, 8> Built;
   18072   SDValue S =
   18073       TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, Built);
   18074 
   18075   for (SDNode *N : Built)
   18076     AddToWorklist(N);
   18077   return S;
   18078 }
   18079 
   18080 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
   18081 /// DAG expression that will generate the same value by right shifting.
   18082 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
   18083   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
   18084   if (!C)
   18085     return SDValue();
   18086 
   18087   // Avoid division by zero.
   18088   if (C->isNullValue())
   18089     return SDValue();
   18090 
   18091   SmallVector<SDNode *, 8> Built;
   18092   SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built);
   18093 
   18094   for (SDNode *N : Built)
   18095     AddToWorklist(N);
   18096   return S;
   18097 }
   18098 
   18099 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
   18100 /// expression that will generate the same value by multiplying by a magic
   18101 /// number.
   18102 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
   18103 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
   18104   // when optimising for minimum size, we don't want to expand a div to a mul
   18105   // and a shift.
   18106   if (DAG.getMachineFunction().getFunction().optForMinSize())
   18107     return SDValue();
   18108 
   18109   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
   18110   if (!C)
   18111     return SDValue();
   18112 
   18113   // Avoid division by zero.
   18114   if (C->isNullValue())
   18115     return SDValue();
   18116 
   18117   SmallVector<SDNode *, 8> Built;
   18118   SDValue S =
   18119       TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, Built);
   18120 
   18121   for (SDNode *N : Built)
   18122     AddToWorklist(N);
   18123   return S;
   18124 }
   18125 
   18126 /// Determines the LogBase2 value for a non-null input value using the
   18127 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
   18128 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
   18129   EVT VT = V.getValueType();
   18130   unsigned EltBits = VT.getScalarSizeInBits();
   18131   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
   18132   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
   18133   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
   18134   return LogBase2;
   18135 }
   18136 
   18137 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
   18138 /// For the reciprocal, we need to find the zero of the function:
   18139 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
   18140 ///     =>
   18141 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
   18142 ///     does not require additional intermediate precision]
   18143 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
   18144   if (Level >= AfterLegalizeDAG)
   18145     return SDValue();
   18146 
   18147   // TODO: Handle half and/or extended types?
   18148   EVT VT = Op.getValueType();
   18149   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
   18150     return SDValue();
   18151 
   18152   // If estimates are explicitly disabled for this function, we're done.
   18153   MachineFunction &MF = DAG.getMachineFunction();
   18154   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
   18155   if (Enabled == TLI.ReciprocalEstimate::Disabled)
   18156     return SDValue();
   18157 
   18158   // Estimates may be explicitly enabled for this type with a custom number of
   18159   // refinement steps.
   18160   int Iterations = TLI.getDivRefinementSteps(VT, MF);
   18161   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
   18162     AddToWorklist(Est.getNode());
   18163 
   18164     if (Iterations) {
   18165       EVT VT = Op.getValueType();
   18166       SDLoc DL(Op);
   18167       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
   18168 
   18169       // Newton iterations: Est = Est + Est (1 - Arg * Est)
   18170       for (int i = 0; i < Iterations; ++i) {
   18171         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
   18172         AddToWorklist(NewEst.getNode());
   18173 
   18174         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
   18175         AddToWorklist(NewEst.getNode());
   18176 
   18177         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
   18178         AddToWorklist(NewEst.getNode());
   18179 
   18180         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
   18181         AddToWorklist(Est.getNode());
   18182       }
   18183     }
   18184     return Est;
   18185   }
   18186 
   18187   return SDValue();
   18188 }
   18189 
   18190 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
   18191 /// For the reciprocal sqrt, we need to find the zero of the function:
   18192 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
   18193 ///     =>
   18194 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
   18195 /// As a result, we precompute A/2 prior to the iteration loop.
   18196 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
   18197                                          unsigned Iterations,
   18198                                          SDNodeFlags Flags, bool Reciprocal) {
   18199   EVT VT = Arg.getValueType();
   18200   SDLoc DL(Arg);
   18201   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
   18202 
   18203   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
   18204   // this entire sequence requires only one FP constant.
   18205   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
   18206   AddToWorklist(HalfArg.getNode());
   18207 
   18208   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
   18209   AddToWorklist(HalfArg.getNode());
   18210 
   18211   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
   18212   for (unsigned i = 0; i < Iterations; ++i) {
   18213     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
   18214     AddToWorklist(NewEst.getNode());
   18215 
   18216     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
   18217     AddToWorklist(NewEst.getNode());
   18218 
   18219     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
   18220     AddToWorklist(NewEst.getNode());
   18221 
   18222     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
   18223     AddToWorklist(Est.getNode());
   18224   }
   18225 
   18226   // If non-reciprocal square root is requested, multiply the result by Arg.
   18227   if (!Reciprocal) {
   18228     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
   18229     AddToWorklist(Est.getNode());
   18230   }
   18231 
   18232   return Est;
   18233 }
   18234 
   18235 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
   18236 /// For the reciprocal sqrt, we need to find the zero of the function:
   18237 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
   18238 ///     =>
   18239 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
   18240 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
   18241                                          unsigned Iterations,
   18242                                          SDNodeFlags Flags, bool Reciprocal) {
   18243   EVT VT = Arg.getValueType();
   18244   SDLoc DL(Arg);
   18245   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
   18246   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
   18247 
   18248   // This routine must enter the loop below to work correctly
   18249   // when (Reciprocal == false).
   18250   assert(Iterations > 0);
   18251 
   18252   // Newton iterations for reciprocal square root:
   18253   // E = (E * -0.5) * ((A * E) * E + -3.0)
   18254   for (unsigned i = 0; i < Iterations; ++i) {
   18255     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
   18256     AddToWorklist(AE.getNode());
   18257 
   18258     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
   18259     AddToWorklist(AEE.getNode());
   18260 
   18261     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
   18262     AddToWorklist(RHS.getNode());
   18263 
   18264     // When calculating a square root at the last iteration build:
   18265     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
   18266     // (notice a common subexpression)
   18267     SDValue LHS;
   18268     if (Reciprocal || (i + 1) < Iterations) {
   18269       // RSQRT: LHS = (E * -0.5)
   18270       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
   18271     } else {
   18272       // SQRT: LHS = (A * E) * -0.5
   18273       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
   18274     }
   18275     AddToWorklist(LHS.getNode());
   18276 
   18277     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
   18278     AddToWorklist(Est.getNode());
   18279   }
   18280 
   18281   return Est;
   18282 }
   18283 
   18284 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
   18285 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
   18286 /// Op can be zero.
   18287 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
   18288                                            bool Reciprocal) {
   18289   if (Level >= AfterLegalizeDAG)
   18290     return SDValue();
   18291 
   18292   // TODO: Handle half and/or extended types?
   18293   EVT VT = Op.getValueType();
   18294   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
   18295     return SDValue();
   18296 
   18297   // If estimates are explicitly disabled for this function, we're done.
   18298   MachineFunction &MF = DAG.getMachineFunction();
   18299   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
   18300   if (Enabled == TLI.ReciprocalEstimate::Disabled)
   18301     return SDValue();
   18302 
   18303   // Estimates may be explicitly enabled for this type with a custom number of
   18304   // refinement steps.
   18305   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
   18306 
   18307   bool UseOneConstNR = false;
   18308   if (SDValue Est =
   18309       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
   18310                           Reciprocal)) {
   18311     AddToWorklist(Est.getNode());
   18312 
   18313     if (Iterations) {
   18314       Est = UseOneConstNR
   18315             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
   18316             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
   18317 
   18318       if (!Reciprocal) {
   18319         // The estimate is now completely wrong if the input was exactly 0.0 or
   18320         // possibly a denormal. Force the answer to 0.0 for those cases.
   18321         EVT VT = Op.getValueType();
   18322         SDLoc DL(Op);
   18323         EVT CCVT = getSetCCResultType(VT);
   18324         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
   18325         const Function &F = DAG.getMachineFunction().getFunction();
   18326         Attribute Denorms = F.getFnAttribute("denormal-fp-math");
   18327         if (Denorms.getValueAsString().equals("ieee")) {
   18328           // fabs(X) < SmallestNormal ? 0.0 : Est
   18329           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
   18330           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
   18331           SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
   18332           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
   18333           SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
   18334           SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
   18335           Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
   18336           AddToWorklist(Fabs.getNode());
   18337           AddToWorklist(IsDenorm.getNode());
   18338           AddToWorklist(Est.getNode());
   18339         } else {
   18340           // X == 0.0 ? 0.0 : Est
   18341           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
   18342           SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
   18343           Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
   18344           AddToWorklist(IsZero.getNode());
   18345           AddToWorklist(Est.getNode());
   18346         }
   18347       }
   18348     }
   18349     return Est;
   18350   }
   18351 
   18352   return SDValue();
   18353 }
   18354 
   18355 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
   18356   return buildSqrtEstimateImpl(Op, Flags, true);
   18357 }
   18358 
   18359 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
   18360   return buildSqrtEstimateImpl(Op, Flags, false);
   18361 }
   18362 
   18363 /// Return true if there is any possibility that the two addresses overlap.
   18364 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
   18365   // If they are the same then they must be aliases.
   18366   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
   18367 
   18368   // If they are both volatile then they cannot be reordered.
   18369   if (Op0->isVolatile() && Op1->isVolatile()) return true;
   18370 
   18371   // If one operation reads from invariant memory, and the other may store, they
   18372   // cannot alias. These should really be checking the equivalent of mayWrite,
   18373   // but it only matters for memory nodes other than load /store.
   18374   if (Op0->isInvariant() && Op1->writeMem())
   18375     return false;
   18376 
   18377   if (Op1->isInvariant() && Op0->writeMem())
   18378     return false;
   18379 
   18380   unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
   18381   unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
   18382 
   18383   // Check for BaseIndexOffset matching.
   18384   BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);
   18385   BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);
   18386   int64_t PtrDiff;
   18387   if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {
   18388     if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
   18389       return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
   18390 
   18391     // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
   18392     // able to calculate their relative offset if at least one arises
   18393     // from an alloca. However, these allocas cannot overlap and we
   18394     // can infer there is no alias.
   18395     if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
   18396       if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
   18397         MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
   18398         // If the base are the same frame index but the we couldn't find a
   18399         // constant offset, (indices are different) be conservative.
   18400         if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
   18401                        !MFI.isFixedObjectIndex(B->getIndex())))
   18402           return false;
   18403       }
   18404 
   18405     bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
   18406     bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
   18407     bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
   18408     bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
   18409     bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
   18410     bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
   18411 
   18412     // If of mismatched base types or checkable indices we can check
   18413     // they do not alias.
   18414     if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
   18415          (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
   18416         (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
   18417       return false;
   18418   }
   18419 
   18420   // If we know required SrcValue1 and SrcValue2 have relatively large
   18421   // alignment compared to the size and offset of the access, we may be able
   18422   // to prove they do not alias. This check is conservative for now to catch
   18423   // cases created by splitting vector types.
   18424   int64_t SrcValOffset0 = Op0->getSrcValueOffset();
   18425   int64_t SrcValOffset1 = Op1->getSrcValueOffset();
   18426   unsigned OrigAlignment0 = Op0->getOriginalAlignment();
   18427   unsigned OrigAlignment1 = Op1->getOriginalAlignment();
   18428   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
   18429       NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
   18430     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
   18431     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
   18432 
   18433     // There is no overlap between these relatively aligned accesses of
   18434     // similar size. Return no alias.
   18435     if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
   18436         (OffAlign1 + NumBytes1) <= OffAlign0)
   18437       return false;
   18438   }
   18439 
   18440   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
   18441                    ? CombinerGlobalAA
   18442                    : DAG.getSubtarget().useAA();
   18443 #ifndef NDEBUG
   18444   if (CombinerAAOnlyFunc.getNumOccurrences() &&
   18445       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
   18446     UseAA = false;
   18447 #endif
   18448 
   18449   if (UseAA && AA &&
   18450       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
   18451     // Use alias analysis information.
   18452     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
   18453     int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
   18454     int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
   18455     AliasResult AAResult =
   18456         AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
   18457                                  UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
   18458                   MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
   18459                                  UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
   18460     if (AAResult == NoAlias)
   18461       return false;
   18462   }
   18463 
   18464   // Otherwise we have to assume they alias.
   18465   return true;
   18466 }
   18467 
   18468 /// Walk up chain skipping non-aliasing memory nodes,
   18469 /// looking for aliasing nodes and adding them to the Aliases vector.
   18470 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
   18471                                    SmallVectorImpl<SDValue> &Aliases) {
   18472   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
   18473   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
   18474 
   18475   // Get alias information for node.
   18476   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
   18477 
   18478   // Starting off.
   18479   Chains.push_back(OriginalChain);
   18480   unsigned Depth = 0;
   18481 
   18482   // Look at each chain and determine if it is an alias.  If so, add it to the
   18483   // aliases list.  If not, then continue up the chain looking for the next
   18484   // candidate.
   18485   while (!Chains.empty()) {
   18486     SDValue Chain = Chains.pop_back_val();
   18487 
   18488     // For TokenFactor nodes, look at each operand and only continue up the
   18489     // chain until we reach the depth limit.
   18490     //
   18491     // FIXME: The depth check could be made to return the last non-aliasing
   18492     // chain we found before we hit a tokenfactor rather than the original
   18493     // chain.
   18494     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
   18495       Aliases.clear();
   18496       Aliases.push_back(OriginalChain);
   18497       return;
   18498     }
   18499 
   18500     // Don't bother if we've been before.
   18501     if (!Visited.insert(Chain.getNode()).second)
   18502       continue;
   18503 
   18504     switch (Chain.getOpcode()) {
   18505     case ISD::EntryToken:
   18506       // Entry token is ideal chain operand, but handled in FindBetterChain.
   18507       break;
   18508 
   18509     case ISD::LOAD:
   18510     case ISD::STORE: {
   18511       // Get alias information for Chain.
   18512       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
   18513           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
   18514 
   18515       // If chain is alias then stop here.
   18516       if (!(IsLoad && IsOpLoad) &&
   18517           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
   18518         Aliases.push_back(Chain);
   18519       } else {
   18520         // Look further up the chain.
   18521         Chains.push_back(Chain.getOperand(0));
   18522         ++Depth;
   18523       }
   18524       break;
   18525     }
   18526 
   18527     case ISD::TokenFactor:
   18528       // We have to check each of the operands of the token factor for "small"
   18529       // token factors, so we queue them up.  Adding the operands to the queue
   18530       // (stack) in reverse order maintains the original order and increases the
   18531       // likelihood that getNode will find a matching token factor (CSE.)
   18532       if (Chain.getNumOperands() > 16) {
   18533         Aliases.push_back(Chain);
   18534         break;
   18535       }
   18536       for (unsigned n = Chain.getNumOperands(); n;)
   18537         Chains.push_back(Chain.getOperand(--n));
   18538       ++Depth;
   18539       break;
   18540 
   18541     case ISD::CopyFromReg:
   18542       // Forward past CopyFromReg.
   18543       Chains.push_back(Chain.getOperand(0));
   18544       ++Depth;
   18545       break;
   18546 
   18547     default:
   18548       // For all other instructions we will just have to take what we can get.
   18549       Aliases.push_back(Chain);
   18550       break;
   18551     }
   18552   }
   18553 }
   18554 
   18555 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
   18556 /// (aliasing node.)
   18557 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
   18558   if (OptLevel == CodeGenOpt::None)
   18559     return OldChain;
   18560 
   18561   // Ops for replacing token factor.
   18562   SmallVector<SDValue, 8> Aliases;
   18563 
   18564   // Accumulate all the aliases to this node.
   18565   GatherAllAliases(N, OldChain, Aliases);
   18566 
   18567   // If no operands then chain to entry token.
   18568   if (Aliases.size() == 0)
   18569     return DAG.getEntryNode();
   18570 
   18571   // If a single operand then chain to it.  We don't need to revisit it.
   18572   if (Aliases.size() == 1)
   18573     return Aliases[0];
   18574 
   18575   // Construct a custom tailored token factor.
   18576   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
   18577 }
   18578 
   18579 // This function tries to collect a bunch of potentially interesting
   18580 // nodes to improve the chains of, all at once. This might seem
   18581 // redundant, as this function gets called when visiting every store
   18582 // node, so why not let the work be done on each store as it's visited?
   18583 //
   18584 // I believe this is mainly important because MergeConsecutiveStores
   18585 // is unable to deal with merging stores of different sizes, so unless
   18586 // we improve the chains of all the potential candidates up-front
   18587 // before running MergeConsecutiveStores, it might only see some of
   18588 // the nodes that will eventually be candidates, and then not be able
   18589 // to go from a partially-merged state to the desired final
   18590 // fully-merged state.
   18591 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
   18592   if (OptLevel == CodeGenOpt::None)
   18593     return false;
   18594 
   18595   // This holds the base pointer, index, and the offset in bytes from the base
   18596   // pointer.
   18597   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
   18598 
   18599   // We must have a base and an offset.
   18600   if (!BasePtr.getBase().getNode())
   18601     return false;
   18602 
   18603   // Do not handle stores to undef base pointers.
   18604   if (BasePtr.getBase().isUndef())
   18605     return false;
   18606 
   18607   SmallVector<StoreSDNode *, 8> ChainedStores;
   18608   ChainedStores.push_back(St);
   18609 
   18610   // Walk up the chain and look for nodes with offsets from the same
   18611   // base pointer. Stop when reaching an instruction with a different kind
   18612   // or instruction which has a different base pointer.
   18613   StoreSDNode *Index = St;
   18614   while (Index) {
   18615     // If the chain has more than one use, then we can't reorder the mem ops.
   18616     if (Index != St && !SDValue(Index, 0)->hasOneUse())
   18617       break;
   18618 
   18619     if (Index->isVolatile() || Index->isIndexed())
   18620       break;
   18621 
   18622     // Find the base pointer and offset for this memory node.
   18623     BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
   18624 
   18625     // Check that the base pointer is the same as the original one.
   18626     if (!BasePtr.equalBaseIndex(Ptr, DAG))
   18627       break;
   18628 
   18629     // Walk up the chain to find the next store node, ignoring any
   18630     // intermediate loads. Any other kind of node will halt the loop.
   18631     SDNode *NextInChain = Index->getChain().getNode();
   18632     while (true) {
   18633       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
   18634         // We found a store node. Use it for the next iteration.
   18635         if (STn->isVolatile() || STn->isIndexed()) {
   18636           Index = nullptr;
   18637           break;
   18638         }
   18639         ChainedStores.push_back(STn);
   18640         Index = STn;
   18641         break;
   18642       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
   18643         NextInChain = Ldn->getChain().getNode();
   18644         continue;
   18645       } else {
   18646         Index = nullptr;
   18647         break;
   18648       }
   18649     }// end while
   18650   }
   18651 
   18652   // At this point, ChainedStores lists all of the Store nodes
   18653   // reachable by iterating up through chain nodes matching the above
   18654   // conditions.  For each such store identified, try to find an
   18655   // earlier chain to attach the store to which won't violate the
   18656   // required ordering.
   18657   bool MadeChangeToSt = false;
   18658   SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
   18659 
   18660   for (StoreSDNode *ChainedStore : ChainedStores) {
   18661     SDValue Chain = ChainedStore->getChain();
   18662     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
   18663 
   18664     if (Chain != BetterChain) {
   18665       if (ChainedStore == St)
   18666         MadeChangeToSt = true;
   18667       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
   18668     }
   18669   }
   18670 
   18671   // Do all replacements after finding the replacements to make to avoid making
   18672   // the chains more complicated by introducing new TokenFactors.
   18673   for (auto Replacement : BetterChains)
   18674     replaceStoreChain(Replacement.first, Replacement.second);
   18675 
   18676   return MadeChangeToSt;
   18677 }
   18678 
   18679 /// This is the entry point for the file.
   18680 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
   18681                            CodeGenOpt::Level OptLevel) {
   18682   /// This is the main entry point to this class.
   18683   DAGCombiner(*this, AA, OptLevel).Run(Level);
   18684 }
   18685